9 9 9 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 // SPDX-License-Identifier: GPL-2.0 /* Builtin firmware support */ #include <linux/firmware.h> #include "../firmware.h" /* Only if FW_LOADER=y */ #ifdef CONFIG_FW_LOADER struct builtin_fw { char *name; void *data; unsigned long size; }; extern struct builtin_fw __start_builtin_fw[]; extern struct builtin_fw __end_builtin_fw[]; static bool fw_copy_to_prealloc_buf(struct firmware *fw, void *buf, size_t size) { if (!buf) return true; if (size < fw->size) return false; memcpy(buf, fw->data, fw->size); return true; } /** * firmware_request_builtin() - load builtin firmware * @fw: pointer to firmware struct * @name: name of firmware file * * Some use cases in the kernel have a requirement so that no memory allocator * is involved as these calls take place early in boot process. An example is * the x86 CPU microcode loader. In these cases all the caller wants is to see * if the firmware was built-in and if so use it right away. This can be used * for such cases. * * This looks for the firmware in the built-in kernel. Only if the kernel was * built-in with the firmware you are looking for will this return successfully. * * Callers of this API do not need to use release_firmware() as the pointer to * the firmware is expected to be provided locally on the stack of the caller. **/ bool firmware_request_builtin(struct firmware *fw, const char *name) { struct builtin_fw *b_fw; if (!fw) return false; for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) { if (strcmp(name, b_fw->name) == 0) { fw->size = b_fw->size; fw->data = b_fw->data; return true; } } return false; } EXPORT_SYMBOL_NS_GPL(firmware_request_builtin, TEST_FIRMWARE); /** * firmware_request_builtin_buf() - load builtin firmware into optional buffer * @fw: pointer to firmware struct * @name: name of firmware file * @buf: If set this lets you use a pre-allocated buffer so that the built-in * firmware into is copied into. This field can be NULL. It is used by * callers such as request_firmware_into_buf() and * request_partial_firmware_into_buf() * @size: if buf was provided, the max size of the allocated buffer available. * If the built-in firmware does not fit into the pre-allocated @buf this * call will fail. * * This looks for the firmware in the built-in kernel. Only if the kernel was * built-in with the firmware you are looking for will this call possibly * succeed. If you passed a @buf the firmware will be copied into it *iff* the * built-in firmware fits into the pre-allocated buffer size specified in * @size. * * This caller is to be used internally by the firmware_loader only. **/ bool firmware_request_builtin_buf(struct firmware *fw, const char *name, void *buf, size_t size) { if (!firmware_request_builtin(fw, name)) return false; return fw_copy_to_prealloc_buf(fw, buf, size); } bool firmware_is_builtin(const struct firmware *fw) { struct builtin_fw *b_fw; for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) if (fw->data == b_fw->data) return true; return false; } #endif
158 86 158 23 41 33 25 117 8 26 2 1 1 35 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 /* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. */ #include <linux/kernel.h> #include <linux/uuid.h> #include <linux/fs.h> #include <linux/fsverity.h> #include <linux/namei.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include "ovl_entry.h" #undef pr_fmt #define pr_fmt(fmt) "overlayfs: " fmt enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), __OVL_PATH_MERGE = (1 << 1), __OVL_PATH_ORIGIN = (1 << 2), }; #define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) #define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) #define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN) #define OVL_XATTR_NAMESPACE "overlay." #define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE #define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE #define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1) #define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE #define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1) #define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX #define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1) #define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX #define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1) enum ovl_xattr { OVL_XATTR_OPAQUE, OVL_XATTR_REDIRECT, OVL_XATTR_ORIGIN, OVL_XATTR_IMPURE, OVL_XATTR_NLINK, OVL_XATTR_UPPER, OVL_XATTR_UUID, OVL_XATTR_METACOPY, OVL_XATTR_PROTATTR, OVL_XATTR_XWHITEOUT, }; enum ovl_inode_flag { /* Pure upper dir that may contain non pure upper entries */ OVL_IMPURE, /* Non-merge dir that may contain whiteout entries */ OVL_WHITEOUTS, OVL_INDEX, OVL_UPPERDATA, /* Inode number will remain constant over copy up. */ OVL_CONST_INO, OVL_HAS_DIGEST, OVL_VERIFIED_DIGEST, }; enum ovl_entry_flag { OVL_E_UPPER_ALIAS, OVL_E_OPAQUE, OVL_E_CONNECTED, /* Lower stack may contain xwhiteout entries */ OVL_E_XWHITEOUTS, }; enum { OVL_REDIRECT_OFF, /* "off" mode is never used. In effect */ OVL_REDIRECT_FOLLOW, /* ...it translates to either "follow" */ OVL_REDIRECT_NOFOLLOW, /* ...or "nofollow". */ OVL_REDIRECT_ON, }; enum { OVL_UUID_OFF, OVL_UUID_NULL, OVL_UUID_AUTO, OVL_UUID_ON, }; enum { OVL_XINO_OFF, OVL_XINO_AUTO, OVL_XINO_ON, }; enum { OVL_VERITY_OFF, OVL_VERITY_ON, OVL_VERITY_REQUIRE, }; /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: * origin.fh - exported file handle of the lower file * origin.uuid - uuid of the lower filesystem */ #define OVL_FH_VERSION 0 #define OVL_FH_MAGIC 0xfb /* CPU byte order required for fid decoding: */ #define OVL_FH_FLAG_BIG_ENDIAN (1 << 0) #define OVL_FH_FLAG_ANY_ENDIAN (1 << 1) /* Is the real inode encoded in fid an upper inode? */ #define OVL_FH_FLAG_PATH_UPPER (1 << 2) #define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \ OVL_FH_FLAG_PATH_UPPER) #if defined(__LITTLE_ENDIAN) #define OVL_FH_FLAG_CPU_ENDIAN 0 #elif defined(__BIG_ENDIAN) #define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN #else #error Endianness not defined #endif /* The type used to be returned by overlay exportfs for misaligned fid */ #define OVL_FILEID_V0 0xfb /* The type returned by overlay exportfs for 32bit aligned fid */ #define OVL_FILEID_V1 0xf8 /* On-disk format for "origin" file handle */ struct ovl_fb { u8 version; /* 0 */ u8 magic; /* 0xfb */ u8 len; /* size of this header + size of fid */ u8 flags; /* OVL_FH_FLAG_* */ u8 type; /* fid_type of fid */ uuid_t uuid; /* uuid of filesystem */ u32 fid[]; /* file identifier should be 32bit aligned in-memory */ } __packed; /* In-memory and on-wire format for overlay file handle */ struct ovl_fh { u8 padding[3]; /* make sure fb.fid is 32bit aligned */ union { struct ovl_fb fb; DECLARE_FLEX_ARRAY(u8, buf); }; } __packed; #define OVL_FH_WIRE_OFFSET offsetof(struct ovl_fh, fb) #define OVL_FH_LEN(fh) (OVL_FH_WIRE_OFFSET + (fh)->fb.len) #define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \ offsetof(struct ovl_fb, fid)) /* On-disk format for "metacopy" xattr (if non-zero size) */ struct ovl_metacopy { u8 version; /* 0 */ u8 len; /* size of this header + used digest bytes */ u8 flags; u8 digest_algo; /* FS_VERITY_HASH_ALG_* constant, 0 for no digest */ u8 digest[FS_VERITY_MAX_DIGEST_SIZE]; /* Only the used part on disk */ } __packed; #define OVL_METACOPY_MAX_SIZE (sizeof(struct ovl_metacopy)) #define OVL_METACOPY_MIN_SIZE (OVL_METACOPY_MAX_SIZE - FS_VERITY_MAX_DIGEST_SIZE) #define OVL_METACOPY_INIT { 0, OVL_METACOPY_MIN_SIZE } static inline int ovl_metadata_digest_size(const struct ovl_metacopy *metacopy) { if (metacopy->len < OVL_METACOPY_MIN_SIZE) return 0; return (int)metacopy->len - OVL_METACOPY_MIN_SIZE; } extern const char *const ovl_xattr_table[][2]; static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox) { return ovl_xattr_table[ox][ofs->config.userxattr]; } /* * When changing ownership of an upper object map the intended ownership * according to the upper layer's idmapping. When an upper mount idmaps files * that are stored on-disk as owned by id 1001 to id 1000 this means stat on * this object will report it as being owned by id 1000 when calling stat via * the upper mount. * In order to change ownership of an object so stat reports id 1000 when * called on an idmapped upper mount the value written to disk - i.e., the * value stored in ia_*id - must 1001. The mount mapping helper will thus take * care to map 1000 to 1001. * The mnt idmapping helpers are nops if the upper layer isn't idmapped. */ static inline int ovl_do_notify_change(struct ovl_fs *ofs, struct dentry *upperdentry, struct iattr *attr) { return notify_change(ovl_upper_mnt_idmap(ofs), upperdentry, attr, NULL); } static inline int ovl_do_rmdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry); pr_debug("rmdir(%pd2) = %i\n", dentry, err); return err; } static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { int err = vfs_unlink(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL); pr_debug("unlink(%pd2) = %i\n", dentry, err); return err; } static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { int err = vfs_link(old_dentry, ovl_upper_mnt_idmap(ofs), dir, new_dentry, NULL); pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err); return err; } static inline int ovl_do_create(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { int err = vfs_create(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, true); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } static inline int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } static inline int ovl_do_mknod(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev); pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err); return err; } static inline int ovl_do_symlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, const char *oldname) { int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname); pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); return err; } static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name, void *value, size_t size) { int err, len; WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb); err = vfs_getxattr(mnt_idmap(path->mnt), path->dentry, name, value, size); len = (value && err > 0) ? err : 0; pr_debug("getxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n", path->dentry, name, min(len, 48), value, size, err); return err; } static inline ssize_t ovl_getxattr_upper(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, void *value, size_t size) { struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_do_getxattr(&upperpath, ovl_xattr(ofs, ox), value, size); } static inline ssize_t ovl_path_getxattr(struct ovl_fs *ofs, const struct path *path, enum ovl_xattr ox, void *value, size_t size) { return ovl_do_getxattr(path, ovl_xattr(ofs, ox), value, size); } static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { int err = vfs_setxattr(ovl_upper_mnt_idmap(ofs), dentry, name, value, size, flags); pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n", dentry, name, min((int)size, 48), value, size, flags, err); return err; } static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox, const void *value, size_t size) { return ovl_do_setxattr(ofs, dentry, ovl_xattr(ofs, ox), value, size, 0); } static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry, const char *name) { int err = vfs_removexattr(ovl_upper_mnt_idmap(ofs), dentry, name); pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); return err; } static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox) { return ovl_do_removexattr(ofs, dentry, ovl_xattr(ofs, ox)); } static inline int ovl_do_set_acl(struct ovl_fs *ofs, struct dentry *dentry, const char *acl_name, struct posix_acl *acl) { return vfs_set_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name, acl); } static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry, const char *acl_name) { return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name); } static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, struct dentry *olddentry, struct inode *newdir, struct dentry *newdentry, unsigned int flags) { int err; struct renamedata rd = { .old_mnt_idmap = ovl_upper_mnt_idmap(ofs), .old_dir = olddir, .old_dentry = olddentry, .new_mnt_idmap = ovl_upper_mnt_idmap(ofs), .new_dir = newdir, .new_dentry = newdentry, .flags = flags, }; pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags); err = vfs_rename(&rd); if (err) { pr_debug("...rename(%pd2, %pd2, ...) = %i\n", olddentry, newdentry, err); } return err; } static inline int ovl_do_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { int err = vfs_whiteout(ovl_upper_mnt_idmap(ofs), dir, dentry); pr_debug("whiteout(%pd2) = %i\n", dentry, err); return err; } static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry }; struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode, O_LARGEFILE | O_WRONLY, current_cred()); int err = PTR_ERR_OR_ZERO(file); pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); return file; } static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs, const char *name, struct dentry *base, int len) { return lookup_one(ovl_upper_mnt_idmap(ofs), name, base, len); } static inline bool ovl_open_flags_need_copy_up(int flags) { if (!flags) return false; return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } static inline int ovl_do_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { if (flags & AT_GETATTR_NOSEC) return vfs_getattr_nosec(path, stat, request_mask, flags); return vfs_getattr(path, stat, request_mask, flags); } /* util.c */ int ovl_get_write_access(struct dentry *dentry); void ovl_put_write_access(struct dentry *dentry); void ovl_start_write(struct dentry *dentry); void ovl_end_write(struct dentry *dentry); int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); static inline const struct cred *ovl_creds(struct super_block *sb) { return OVL_FS(sb)->creator_cred; } int ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); bool ovl_index_all(struct super_block *sb); bool ovl_verify_lower(struct super_block *sb); struct ovl_path *ovl_stack_alloc(unsigned int n); void ovl_stack_cpy(struct ovl_path *dst, struct ovl_path *src, unsigned int n); void ovl_stack_put(struct ovl_path *stack, unsigned int n); void ovl_stack_free(struct ovl_path *stack, unsigned int n); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); void ovl_free_entry(struct ovl_entry *oe); bool ovl_dentry_remote(struct dentry *dentry); void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry); void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry, struct ovl_entry *oe); void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry, struct ovl_entry *oe, unsigned int mask); bool ovl_dentry_weird(struct dentry *dentry); enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); void ovl_path_lower(struct dentry *dentry, struct path *path); void ovl_path_lowerdata(struct dentry *dentry, struct path *path); struct inode *ovl_i_path_real(struct inode *inode, struct path *path); enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_lowerdata(struct dentry *dentry); int ovl_dentry_set_lowerdata(struct dentry *dentry, struct ovl_path *datapath); const struct ovl_layer *ovl_i_layer_lower(struct inode *inode); const struct ovl_layer *ovl_layer_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_lowerdata(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); struct inode *ovl_inode_realdata(struct inode *inode); const char *ovl_lowerdata_redirect(struct inode *inode); struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); bool ovl_dentry_has_xwhiteouts(struct dentry *dentry); void ovl_dentry_set_xwhiteouts(struct dentry *dentry); void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, const struct ovl_layer *layer); bool ovl_dentry_has_upper_alias(struct dentry *dentry); void ovl_dentry_set_upper_alias(struct dentry *dentry); bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags); bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags); bool ovl_has_upperdata(struct inode *inode); void ovl_set_upperdata(struct inode *inode); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dir_modified(struct dentry *dentry, bool impurity); u64 ovl_inode_version_get(struct inode *inode); bool ovl_is_whiteout(struct dentry *dentry); bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path); struct file *ovl_path_open(const struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry, int flags); void ovl_copy_up_end(struct dentry *dentry); bool ovl_already_copied_up(struct dentry *dentry, int flags); char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, enum ovl_xattr ox); bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath); static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs, struct dentry *upperdentry) { struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_path_is_whiteout(ofs, &upperpath); } static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *upperdentry) { struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_path_check_origin_xattr(ofs, &upperpath); } int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr); int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); bool ovl_is_inuse(struct dentry *dentry); bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry); void ovl_nlink_end(struct dentry *dentry); int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path, struct ovl_metacopy *data); int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, struct ovl_metacopy *metacopy); bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); int ovl_ensure_verity_loaded(struct path *path); int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path, u8 *digest_buf, int *buf_length); int ovl_validate_verity(struct ovl_fs *ofs, struct path *metapath, struct path *datapath); int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src, struct ovl_metacopy *metacopy); int ovl_sync_status(struct ovl_fs *ofs); static inline void ovl_set_flag(unsigned long flag, struct inode *inode) { set_bit(flag, &OVL_I(inode)->flags); } static inline void ovl_clear_flag(unsigned long flag, struct inode *inode) { clear_bit(flag, &OVL_I(inode)->flags); } static inline bool ovl_test_flag(unsigned long flag, struct inode *inode) { return test_bit(flag, &OVL_I(inode)->flags); } static inline bool ovl_is_impuredir(struct super_block *sb, struct dentry *upperdentry) { struct ovl_fs *ofs = OVL_FS(sb); struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y'; } static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs, const struct path *path) { return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE); } static inline bool ovl_redirect_follow(struct ovl_fs *ofs) { return ofs->config.redirect_mode != OVL_REDIRECT_NOFOLLOW; } static inline bool ovl_redirect_dir(struct ovl_fs *ofs) { return ofs->config.redirect_mode == OVL_REDIRECT_ON; } static inline bool ovl_origin_uuid(struct ovl_fs *ofs) { return ofs->config.uuid != OVL_UUID_OFF; } static inline bool ovl_has_fsid(struct ovl_fs *ofs) { return ofs->config.uuid == OVL_UUID_ON || ofs->config.uuid == OVL_UUID_AUTO; } /* * With xino=auto, we do best effort to keep all inodes on same st_dev and * d_ino consistent with st_ino. * With xino=on, we do the same effort but we warn if we failed. */ static inline bool ovl_xino_warn(struct ovl_fs *ofs) { return ofs->config.xino == OVL_XINO_ON; } /* * To avoid regressions in existing setups with overlay lower offline changes, * we allow lower changes only if none of the new features are used. */ static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs) { return (!ofs->config.index && !ofs->config.metacopy && !ovl_redirect_dir(ofs) && !ovl_xino_warn(ofs)); } /* All layers on same fs? */ static inline bool ovl_same_fs(struct ovl_fs *ofs) { return ofs->xino_mode == 0; } /* All overlay inodes have same st_dev? */ static inline bool ovl_same_dev(struct ovl_fs *ofs) { return ofs->xino_mode >= 0; } static inline unsigned int ovl_xino_bits(struct ovl_fs *ofs) { return ovl_same_dev(ofs) ? ofs->xino_mode : 0; } static inline void ovl_inode_lock(struct inode *inode) { mutex_lock(&OVL_I(inode)->lock); } static inline int ovl_inode_lock_interruptible(struct inode *inode) { return mutex_lock_interruptible(&OVL_I(inode)->lock); } static inline void ovl_inode_unlock(struct inode *inode) { mutex_unlock(&OVL_I(inode)->lock); } /* namei.c */ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len); static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) { if (fh_len < sizeof(struct ovl_fh)) return -EINVAL; return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET); } struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, struct vfsmount *mnt, bool connected); int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox, const struct ovl_fh *fh, bool is_upper, bool set); int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox, struct dentry *real, bool is_upper, bool set); struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name); int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool verify); int ovl_path_next(int idx, struct dentry *dentry, struct path *path, const struct ovl_layer **layer); int ovl_verify_lowerdata(struct dentry *dentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper, const struct ovl_fh *fh, bool set) { return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set); } static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool set) { return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin, false, set); } static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index, struct dentry *upper, bool set) { return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper, true, set); } /* readdir.c */ extern const struct file_operations ovl_dir_operations; struct file *ovl_dir_real_file(const struct file *file, bool want_upper); int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, struct list_head *list); void ovl_cache_free(struct list_head *list); void ovl_dir_cache_free(struct inode *inode); int ovl_check_d_type_supported(const struct path *realpath); int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); int ovl_indexdir_cleanup(struct ovl_fs *ofs); /* * Can we iterate real dir directly? * * Non-merge dir may contain whiteouts from a time it was a merge upper, before * lower dir was removed under it and possibly before it was rotated from upper * to lower layer. */ static inline bool ovl_dir_is_real(struct inode *dir) { return !ovl_test_flag(OVL_WHITEOUTS, dir); } /* inode.c */ int ovl_set_nlink_upper(struct dentry *dentry); int ovl_set_nlink_lower(struct dentry *dentry); unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback); int ovl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, struct inode *inode, int type, bool rcu, bool noperm); static inline struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type, bool rcu) { return do_ovl_get_acl(&nop_mnt_idmap, inode, type, rcu, true); } static inline struct posix_acl *ovl_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { return do_ovl_get_acl(idmap, d_inode(dentry), type, false, false); } int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); struct posix_acl *ovl_get_acl_path(const struct path *path, const char *acl_name, bool noperm); #else #define ovl_get_inode_acl NULL #define ovl_get_acl NULL #define ovl_set_acl NULL static inline struct posix_acl *ovl_get_acl_path(const struct path *path, const char *acl_name, bool noperm) { return NULL; } #endif int ovl_update_time(struct inode *inode, int flags); bool ovl_is_private_xattr(struct super_block *sb, const char *name); struct ovl_inode_params { struct inode *newinode; struct dentry *upperdentry; struct ovl_entry *oe; bool index; char *redirect; char *lowerdata_redirect; }; void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, unsigned long ino, int fsid); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper); bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir); struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir); struct inode *ovl_get_inode(struct super_block *sb, struct ovl_inode_params *oip); void ovl_copyattr(struct inode *to); /* vfs inode flags copied from real to ovl inode */ #define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) /* vfs inode flags read from overlay.protattr xattr to ovl inode */ #define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) /* * fileattr flags copied from lower to upper inode on copy up. * We cannot copy up immutable/append-only flags, because that would prevent * linking temp inode to upper dir, so we store them in xattr instead. */ #define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) #define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) #define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL) #define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE) void ovl_check_protattr(struct inode *inode, struct dentry *upper); int ovl_set_protattr(struct inode *inode, struct dentry *upper, struct fileattr *fa); static inline void ovl_copyflags(struct inode *from, struct inode *to) { unsigned int mask = OVL_COPY_I_FLAGS_MASK; inode_set_flags(to, from->i_flags & mask, mask); } /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry); struct ovl_cattr { dev_t rdev; umode_t mode; const char *link; struct dentry *hardlink; }; #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); int ovl_cleanup(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry); struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir); struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ extern const struct file_operations ovl_file_operations; int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa); int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); int ovl_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_with_data(struct dentry *dentry); int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new); int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; extern const struct export_operations ovl_export_fid_operations; /* super.c */ int ovl_fill_super(struct super_block *sb, struct fs_context *fc); /* Will this overlay be forced to mount/remount ro? */ static inline bool ovl_force_readonly(struct ovl_fs *ofs) { return (!ovl_upper_mnt(ofs) || !ofs->workdir); } /* xattr.c */ const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs); int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
1651 404 336 22 1839 1840 1841 1828 52 22 56 56 56 56 150 150 2073 2074 2075 120 120 120 120 2301 2298 2300 680 681 663 664 625 623 45 35 150 150 150 150 56 56 56 149 138 140 2 21 1 1 1 191 190 191 636 635 635 49 47 49 17 16 1 109 109 2478 2478 2478 126 126 126 10 116 124 2 2 11 120 25 109 126 319 234 102 318 318 1 1 120 17 1 21 21 21 21 21 21 21 3410 3412 3240 79 3247 1111 2260 3250 1326 2395 2341 656 2244 3 336 2196 672 1824 52 1836 1661 403 657 487 149 148 149 7 7 194 35 226 226 226 1 1 245 120 149 149 251 251 71 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 // SPDX-License-Identifier: GPL-2.0-only /* * fs/fs-writeback.c * * Copyright (C) 2002, Linus Torvalds. * * Contains all the functions related to writing back and waiting * upon dirty inodes against superblocks, and writing back dirty * pages against inodes. ie: data writeback. Writeout of the * inode itself is not handled here. * * 10Apr2002 Andrew Morton * Split out of fs/inode.c * Additions for address_space-based writeback */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/kthread.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/tracepoint.h> #include <linux/device.h> #include <linux/memcontrol.h> #include "internal.h" /* * 4MB minimal write chunk size */ #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) /* * Passed into wb_writeback(), essentially a subset of writeback_control */ struct wb_writeback_work { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ unsigned int auto_free:1; /* free on completion */ enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ struct wb_completion *done; /* set if the caller waits */ }; /* * If an inode is constantly having its pages dirtied, but then the * updates stop dirtytime_expire_interval seconds in the past, it's * possible for the worst case time between when an inode has its * timestamps updated and when they finally get written out to be two * dirtytime_expire_intervals. We set the default to 12 hours (in * seconds), which means most of the time inodes will have their * timestamps written to disk after 12 hours, but in the worst case a * few inodes might not their timestamps updated for 24 hours. */ unsigned int dirtytime_expire_interval = 12 * 60 * 60; static inline struct inode *wb_inode(struct list_head *head) { return list_entry(head, struct inode, i_io_list); } /* * Include the creation of the trace points after defining the * wb_writeback_work structure and inline functions so that the definition * remains local to this file. */ #define CREATE_TRACE_POINTS #include <trace/events/writeback.h> EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage); static bool wb_io_lists_populated(struct bdi_writeback *wb) { if (wb_has_dirty_io(wb)) { return false; } else { set_bit(WB_has_dirty_io, &wb->state); WARN_ON_ONCE(!wb->avg_write_bandwidth); atomic_long_add(wb->avg_write_bandwidth, &wb->bdi->tot_write_bandwidth); return true; } } static void wb_io_lists_depopulated(struct bdi_writeback *wb) { if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) && list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) { clear_bit(WB_has_dirty_io, &wb->state); WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth, &wb->bdi->tot_write_bandwidth) < 0); } } /** * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list * @inode: inode to be moved * @wb: target bdi_writeback * @head: one of @wb->b_{dirty|io|more_io|dirty_time} * * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io. * Returns %true if @inode is the first occupant of the !dirty_time IO * lists; otherwise, %false. */ static bool inode_io_list_move_locked(struct inode *inode, struct bdi_writeback *wb, struct list_head *head) { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); WARN_ON_ONCE(inode->i_state & I_FREEING); list_move(&inode->i_io_list, head); /* dirty_time doesn't count as dirty_io until expiration */ if (head != &wb->b_dirty_time) return wb_io_lists_populated(wb); wb_io_lists_depopulated(wb); return false; } static void wb_wakeup(struct bdi_writeback *wb) { spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) mod_delayed_work(bdi_wq, &wb->dwork, 0); spin_unlock_irq(&wb->work_lock); } static void finish_writeback_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { struct wb_completion *done = work->done; if (work->auto_free) kfree(work); if (done) { wait_queue_head_t *waitq = done->waitq; /* @done can't be accessed after the following dec */ if (atomic_dec_and_test(&done->cnt)) wake_up_all(waitq); } } static void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { trace_writeback_queue(wb, work); if (work->done) atomic_inc(&work->done->cnt); spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) { list_add_tail(&work->list, &wb->work_list); mod_delayed_work(bdi_wq, &wb->dwork, 0); } else finish_writeback_work(wb, work); spin_unlock_irq(&wb->work_lock); } /** * wb_wait_for_completion - wait for completion of bdi_writeback_works * @done: target wb_completion * * Wait for one or more work items issued to @bdi with their ->done field * set to @done, which should have been initialized with * DEFINE_WB_COMPLETION(). This function returns after all such work items * are completed. Work items which are waited upon aren't freed * automatically on completion. */ void wb_wait_for_completion(struct wb_completion *done) { atomic_dec(&done->cnt); /* put down the initial count */ wait_event(*done->waitq, !atomic_read(&done->cnt)); } #ifdef CONFIG_CGROUP_WRITEBACK /* * Parameters for foreign inode detection, see wbc_detach_inode() to see * how they're used. * * These paramters are inherently heuristical as the detection target * itself is fuzzy. All we want to do is detaching an inode from the * current owner if it's being written to by some other cgroups too much. * * The current cgroup writeback is built on the assumption that multiple * cgroups writing to the same inode concurrently is very rare and a mode * of operation which isn't well supported. As such, the goal is not * taking too long when a different cgroup takes over an inode while * avoiding too aggressive flip-flops from occasional foreign writes. * * We record, very roughly, 2s worth of IO time history and if more than * half of that is foreign, trigger the switch. The recording is quantized * to 16 slots. To avoid tiny writes from swinging the decision too much, * writes smaller than 1/8 of avg size are ignored. */ #define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */ #define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */ #define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */ #define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */ #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */ #define WB_FRN_HIST_UNIT (WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS) /* each slot's duration is 2s / 16 */ #define WB_FRN_HIST_THR_SLOTS (WB_FRN_HIST_SLOTS / 2) /* if foreign slots >= 8, switch */ #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) /* one round can affect upto 5 slots */ #define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */ /* * Maximum inodes per isw. A specific value has been chosen to make * struct inode_switch_wbs_context fit into 1024 bytes kmalloc. */ #define WB_MAX_INODES_PER_ISW ((1024UL - sizeof(struct inode_switch_wbs_context)) \ / sizeof(struct inode *)) static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); static struct workqueue_struct *isw_wq; void __inode_attach_wb(struct inode *inode, struct folio *folio) { struct backing_dev_info *bdi = inode_to_bdi(inode); struct bdi_writeback *wb = NULL; if (inode_cgwb_enabled(inode)) { struct cgroup_subsys_state *memcg_css; if (folio) { memcg_css = mem_cgroup_css_from_folio(folio); wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); } else { /* must pin memcg_css, see wb_get_create() */ memcg_css = task_get_css(current, memory_cgrp_id); wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); css_put(memcg_css); } } if (!wb) wb = &bdi->wb; /* * There may be multiple instances of this function racing to * update the same inode. Use cmpxchg() to tell the winner. */ if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) wb_put(wb); } EXPORT_SYMBOL_GPL(__inode_attach_wb); /** * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list * @inode: inode of interest with i_lock held * @wb: target bdi_writeback * * Remove the inode from wb's io lists and if necessarily put onto b_attached * list. Only inodes attached to cgwb's are kept on this list. */ static void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; if (wb != &wb->bdi->wb) list_move(&inode->i_io_list, &wb->b_attached); else list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); } /** * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it * @inode: inode of interest with i_lock held * * Returns @inode's wb with its list_lock held. @inode->i_lock must be * held on entry and is released on return. The returned wb is guaranteed * to stay @inode's associated wb until its list_lock is released. */ static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) __acquires(&wb->list_lock) { while (true) { struct bdi_writeback *wb = inode_to_wb(inode); /* * inode_to_wb() association is protected by both * @inode->i_lock and @wb->list_lock but list_lock nests * outside i_lock. Drop i_lock and verify that the * association hasn't changed after acquiring list_lock. */ wb_get(wb); spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock); /* i_wb may have changed inbetween, can't use inode_to_wb() */ if (likely(wb == inode->i_wb)) { wb_put(wb); /* @inode already has ref */ return wb; } spin_unlock(&wb->list_lock); wb_put(wb); cpu_relax(); spin_lock(&inode->i_lock); } } /** * inode_to_wb_and_lock_list - determine an inode's wb and lock it * @inode: inode of interest * * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held * on entry. */ static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode) __acquires(&wb->list_lock) { spin_lock(&inode->i_lock); return locked_inode_to_wb_and_lock_list(inode); } struct inode_switch_wbs_context { struct rcu_work work; /* * Multiple inodes can be switched at once. The switching procedure * consists of two parts, separated by a RCU grace period. To make * sure that the second part is executed for each inode gone through * the first part, all inode pointers are placed into a NULL-terminated * array embedded into struct inode_switch_wbs_context. Otherwise * an inode could be left in a non-consistent state. */ struct bdi_writeback *new_wb; struct inode *inodes[]; }; static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { down_write(&bdi->wb_switch_rwsem); } static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { up_write(&bdi->wb_switch_rwsem); } static bool inode_do_switch_wbs(struct inode *inode, struct bdi_writeback *old_wb, struct bdi_writeback *new_wb) { struct address_space *mapping = inode->i_mapping; XA_STATE(xas, &mapping->i_pages, 0); struct folio *folio; bool switched = false; spin_lock(&inode->i_lock); xa_lock_irq(&mapping->i_pages); /* * Once I_FREEING or I_WILL_FREE are visible under i_lock, the eviction * path owns the inode and we shouldn't modify ->i_io_list. */ if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE))) goto skip_switch; trace_inode_switch_wbs(inode, old_wb, new_wb); /* * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points * to possibly dirty folios while PAGECACHE_TAG_WRITEBACK points to * folios actually under writeback. */ xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_DIRTY) { if (folio_test_dirty(folio)) { long nr = folio_nr_pages(folio); wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr); wb_stat_mod(new_wb, WB_RECLAIMABLE, nr); } } xas_set(&xas, 0); xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { long nr = folio_nr_pages(folio); WARN_ON_ONCE(!folio_test_writeback(folio)); wb_stat_mod(old_wb, WB_WRITEBACK, -nr); wb_stat_mod(new_wb, WB_WRITEBACK, nr); } if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { atomic_dec(&old_wb->writeback_inodes); atomic_inc(&new_wb->writeback_inodes); } wb_get(new_wb); /* * Transfer to @new_wb's IO list if necessary. If the @inode is dirty, * the specific list @inode was on is ignored and the @inode is put on * ->b_dirty which is always correct including from ->b_dirty_time. * The transfer preserves @inode->dirtied_when ordering. If the @inode * was clean, it means it was on the b_attached list, so move it onto * the b_attached list of @new_wb. */ if (!list_empty(&inode->i_io_list)) { inode->i_wb = new_wb; if (inode->i_state & I_DIRTY_ALL) { struct inode *pos; list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) if (time_after_eq(inode->dirtied_when, pos->dirtied_when)) break; inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev); } else { inode_cgwb_move_to_attached(inode, new_wb); } } else { inode->i_wb = new_wb; } /* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */ inode->i_wb_frn_winner = 0; inode->i_wb_frn_avg_time = 0; inode->i_wb_frn_history = 0; switched = true; skip_switch: /* * Paired with load_acquire in unlocked_inode_to_wb_begin() and * ensures that the new wb is visible if they see !I_WB_SWITCH. */ smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); xa_unlock_irq(&mapping->i_pages); spin_unlock(&inode->i_lock); return switched; } static void inode_switch_wbs_work_fn(struct work_struct *work) { struct inode_switch_wbs_context *isw = container_of(to_rcu_work(work), struct inode_switch_wbs_context, work); struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]); struct bdi_writeback *old_wb = isw->inodes[0]->i_wb; struct bdi_writeback *new_wb = isw->new_wb; unsigned long nr_switched = 0; struct inode **inodep; /* * If @inode switches cgwb membership while sync_inodes_sb() is * being issued, sync_inodes_sb() might miss it. Synchronize. */ down_read(&bdi->wb_switch_rwsem); /* * By the time control reaches here, RCU grace period has passed * since I_WB_SWITCH assertion and all wb stat update transactions * between unlocked_inode_to_wb_begin/end() are guaranteed to be * synchronizing against the i_pages lock. * * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock * gives us exclusion against all wb related operations on @inode * including IO list manipulations and stat updates. */ if (old_wb < new_wb) { spin_lock(&old_wb->list_lock); spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING); } else { spin_lock(&new_wb->list_lock); spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); } for (inodep = isw->inodes; *inodep; inodep++) { WARN_ON_ONCE((*inodep)->i_wb != old_wb); if (inode_do_switch_wbs(*inodep, old_wb, new_wb)) nr_switched++; } spin_unlock(&new_wb->list_lock); spin_unlock(&old_wb->list_lock); up_read(&bdi->wb_switch_rwsem); if (nr_switched) { wb_wakeup(new_wb); wb_put_many(old_wb, nr_switched); } for (inodep = isw->inodes; *inodep; inodep++) iput(*inodep); wb_put(new_wb); kfree(isw); atomic_dec(&isw_nr_in_flight); } static bool inode_prepare_wbs_switch(struct inode *inode, struct bdi_writeback *new_wb) { /* * Paired with smp_mb() in cgroup_writeback_umount(). * isw_nr_in_flight must be increased before checking SB_ACTIVE and * grabbing an inode, otherwise isw_nr_in_flight can be observed as 0 * in cgroup_writeback_umount() and the isw_wq will be not flushed. */ smp_mb(); if (IS_DAX(inode)) return false; /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); if (!(inode->i_sb->s_flags & SB_ACTIVE) || inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) || inode_to_wb(inode) == new_wb) { spin_unlock(&inode->i_lock); return false; } inode->i_state |= I_WB_SWITCH; __iget(inode); spin_unlock(&inode->i_lock); return true; } /** * inode_switch_wbs - change the wb association of an inode * @inode: target inode * @new_wb_id: ID of the new wb * * Switch @inode's wb association to the wb identified by @new_wb_id. The * switching is performed asynchronously and may fail silently. */ static void inode_switch_wbs(struct inode *inode, int new_wb_id) { struct backing_dev_info *bdi = inode_to_bdi(inode); struct cgroup_subsys_state *memcg_css; struct inode_switch_wbs_context *isw; /* noop if seems to be already in progress */ if (inode->i_state & I_WB_SWITCH) return; /* avoid queueing a new switch if too many are already in flight */ if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT) return; isw = kzalloc(struct_size(isw, inodes, 2), GFP_ATOMIC); if (!isw) return; atomic_inc(&isw_nr_in_flight); /* find and pin the new wb */ rcu_read_lock(); memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys); if (memcg_css && !css_tryget(memcg_css)) memcg_css = NULL; rcu_read_unlock(); if (!memcg_css) goto out_free; isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); css_put(memcg_css); if (!isw->new_wb) goto out_free; if (!inode_prepare_wbs_switch(inode, isw->new_wb)) goto out_free; isw->inodes[0] = inode; /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the i_page * lock so that stat transfer can synchronize against them. * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); queue_rcu_work(isw_wq, &isw->work); return; out_free: atomic_dec(&isw_nr_in_flight); if (isw->new_wb) wb_put(isw->new_wb); kfree(isw); } static bool isw_prepare_wbs_switch(struct inode_switch_wbs_context *isw, struct list_head *list, int *nr) { struct inode *inode; list_for_each_entry(inode, list, i_io_list) { if (!inode_prepare_wbs_switch(inode, isw->new_wb)) continue; isw->inodes[*nr] = inode; (*nr)++; if (*nr >= WB_MAX_INODES_PER_ISW - 1) return true; } return false; } /** * cleanup_offline_cgwb - detach associated inodes * @wb: target wb * * Switch all inodes attached to @wb to a nearest living ancestor's wb in order * to eventually release the dying @wb. Returns %true if not all inodes were * switched and the function has to be restarted. */ bool cleanup_offline_cgwb(struct bdi_writeback *wb) { struct cgroup_subsys_state *memcg_css; struct inode_switch_wbs_context *isw; int nr; bool restart = false; isw = kzalloc(struct_size(isw, inodes, WB_MAX_INODES_PER_ISW), GFP_KERNEL); if (!isw) return restart; atomic_inc(&isw_nr_in_flight); for (memcg_css = wb->memcg_css->parent; memcg_css; memcg_css = memcg_css->parent) { isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL); if (isw->new_wb) break; } if (unlikely(!isw->new_wb)) isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */ nr = 0; spin_lock(&wb->list_lock); /* * In addition to the inodes that have completed writeback, also switch * cgwbs for those inodes only with dirty timestamps. Otherwise, those * inodes won't be written back for a long time when lazytime is * enabled, and thus pinning the dying cgwbs. It won't break the * bandwidth restrictions, as writeback of inode metadata is not * accounted for. */ restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr); if (!restart) restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr); spin_unlock(&wb->list_lock); /* no attached inodes? bail out */ if (nr == 0) { atomic_dec(&isw_nr_in_flight); wb_put(isw->new_wb); kfree(isw); return restart; } /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the i_page * lock so that stat transfer can synchronize against them. * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); queue_rcu_work(isw_wq, &isw->work); return restart; } /** * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it * @wbc: writeback_control of interest * @inode: target inode * * @inode is locked and about to be written back under the control of @wbc. * Record @inode's writeback context into @wbc and unlock the i_lock. On * writeback completion, wbc_detach_inode() should be called. This is used * to track the cgroup writeback context. */ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) { if (!inode_cgwb_enabled(inode)) { spin_unlock(&inode->i_lock); return; } wbc->wb = inode_to_wb(inode); wbc->inode = inode; wbc->wb_id = wbc->wb->memcg_css->id; wbc->wb_lcand_id = inode->i_wb_frn_winner; wbc->wb_tcand_id = 0; wbc->wb_bytes = 0; wbc->wb_lcand_bytes = 0; wbc->wb_tcand_bytes = 0; wb_get(wbc->wb); spin_unlock(&inode->i_lock); /* * A dying wb indicates that either the blkcg associated with the * memcg changed or the associated memcg is dying. In the first * case, a replacement wb should already be available and we should * refresh the wb immediately. In the second case, trying to * refresh will keep failing. */ if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) inode_switch_wbs(inode, wbc->wb_id); } EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode); /** * wbc_detach_inode - disassociate wbc from inode and perform foreign detection * @wbc: writeback_control of the just finished writeback * * To be called after a writeback attempt of an inode finishes and undoes * wbc_attach_and_unlock_inode(). Can be called under any context. * * As concurrent write sharing of an inode is expected to be very rare and * memcg only tracks page ownership on first-use basis severely confining * the usefulness of such sharing, cgroup writeback tracks ownership * per-inode. While the support for concurrent write sharing of an inode * is deemed unnecessary, an inode being written to by different cgroups at * different points in time is a lot more common, and, more importantly, * charging only by first-use can too readily lead to grossly incorrect * behaviors (single foreign page can lead to gigabytes of writeback to be * incorrectly attributed). * * To resolve this issue, cgroup writeback detects the majority dirtier of * an inode and transfers the ownership to it. To avoid unnecessary * oscillation, the detection mechanism keeps track of history and gives * out the switch verdict only if the foreign usage pattern is stable over * a certain amount of time and/or writeback attempts. * * On each writeback attempt, @wbc tries to detect the majority writer * using Boyer-Moore majority vote algorithm. In addition to the byte * count from the majority voting, it also counts the bytes written for the * current wb and the last round's winner wb (max of last round's current * wb, the winner from two rounds ago, and the last round's majority * candidate). Keeping track of the historical winner helps the algorithm * to semi-reliably detect the most active writer even when it's not the * absolute majority. * * Once the winner of the round is determined, whether the winner is * foreign or not and how much IO time the round consumed is recorded in * inode->i_wb_frn_history. If the amount of recorded foreign IO time is * over a certain threshold, the switch verdict is given. */ void wbc_detach_inode(struct writeback_control *wbc) { struct bdi_writeback *wb = wbc->wb; struct inode *inode = wbc->inode; unsigned long avg_time, max_bytes, max_time; u16 history; int max_id; if (!wb) return; history = inode->i_wb_frn_history; avg_time = inode->i_wb_frn_avg_time; /* pick the winner of this round */ if (wbc->wb_bytes >= wbc->wb_lcand_bytes && wbc->wb_bytes >= wbc->wb_tcand_bytes) { max_id = wbc->wb_id; max_bytes = wbc->wb_bytes; } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) { max_id = wbc->wb_lcand_id; max_bytes = wbc->wb_lcand_bytes; } else { max_id = wbc->wb_tcand_id; max_bytes = wbc->wb_tcand_bytes; } /* * Calculate the amount of IO time the winner consumed and fold it * into the running average kept per inode. If the consumed IO * time is lower than avag / WB_FRN_TIME_CUT_DIV, ignore it for * deciding whether to switch or not. This is to prevent one-off * small dirtiers from skewing the verdict. */ max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT, wb->avg_write_bandwidth); if (avg_time) avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) - (avg_time >> WB_FRN_TIME_AVG_SHIFT); else avg_time = max_time; /* immediate catch up on first run */ if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) { int slots; /* * The switch verdict is reached if foreign wb's consume * more than a certain proportion of IO time in a * WB_FRN_TIME_PERIOD. This is loosely tracked by 16 slot * history mask where each bit represents one sixteenth of * the period. Determine the number of slots to shift into * history from @max_time. */ slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT), (unsigned long)WB_FRN_HIST_MAX_SLOTS); history <<= slots; if (wbc->wb_id != max_id) history |= (1U << slots) - 1; if (history) trace_inode_foreign_history(inode, wbc, history); /* * Switch if the current wb isn't the consistent winner. * If there are multiple closely competing dirtiers, the * inode may switch across them repeatedly over time, which * is okay. The main goal is avoiding keeping an inode on * the wrong wb for an extended period of time. */ if (hweight16(history) > WB_FRN_HIST_THR_SLOTS) inode_switch_wbs(inode, max_id); } /* * Multiple instances of this function may race to update the * following fields but we don't mind occassional inaccuracies. */ inode->i_wb_frn_winner = max_id; inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX); inode->i_wb_frn_history = history; wb_put(wbc->wb); wbc->wb = NULL; } EXPORT_SYMBOL_GPL(wbc_detach_inode); /** * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership * @wbc: writeback_control of the writeback in progress * @page: page being written out * @bytes: number of bytes being written out * * @bytes from @page are about to written out during the writeback * controlled by @wbc. Keep the book for foreign inode detection. See * wbc_detach_inode(). */ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes) { struct folio *folio; struct cgroup_subsys_state *css; int id; /* * pageout() path doesn't attach @wbc to the inode being written * out. This is intentional as we don't want the function to block * behind a slow cgroup. Ultimately, we want pageout() to kick off * regular writeback instead of writing things out itself. */ if (!wbc->wb || wbc->no_cgroup_owner) return; folio = page_folio(page); css = mem_cgroup_css_from_folio(folio); /* dead cgroups shouldn't contribute to inode ownership arbitration */ if (!(css->flags & CSS_ONLINE)) return; id = css->id; if (id == wbc->wb_id) { wbc->wb_bytes += bytes; return; } if (id == wbc->wb_lcand_id) wbc->wb_lcand_bytes += bytes; /* Boyer-Moore majority vote algorithm */ if (!wbc->wb_tcand_bytes) wbc->wb_tcand_id = id; if (id == wbc->wb_tcand_id) wbc->wb_tcand_bytes += bytes; else wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); } EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner); /** * wb_split_bdi_pages - split nr_pages to write according to bandwidth * @wb: target bdi_writeback to split @nr_pages to * @nr_pages: number of pages to write for the whole bdi * * Split @wb's portion of @nr_pages according to @wb's write bandwidth in * relation to the total write bandwidth of all wb's w/ dirty inodes on * @wb->bdi. */ static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages) { unsigned long this_bw = wb->avg_write_bandwidth; unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); if (nr_pages == LONG_MAX) return LONG_MAX; /* * This may be called on clean wb's and proportional distribution * may not make sense, just use the original @nr_pages in those * cases. In general, we wanna err on the side of writing more. */ if (!tot_bw || this_bw >= tot_bw) return nr_pages; else return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw); } /** * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi * @bdi: target backing_dev_info * @base_work: wb_writeback_work to issue * @skip_if_busy: skip wb's which already have writeback in progress * * Split and issue @base_work to all wb's (bdi_writeback's) of @bdi which * have dirty inodes. If @base_work->nr_page isn't %LONG_MAX, it's * distributed to the busy wbs according to each wb's proportion in the * total active write bandwidth of @bdi. */ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, struct wb_writeback_work *base_work, bool skip_if_busy) { struct bdi_writeback *last_wb = NULL; struct bdi_writeback *wb = list_entry(&bdi->wb_list, struct bdi_writeback, bdi_node); might_sleep(); restart: rcu_read_lock(); list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) { DEFINE_WB_COMPLETION(fallback_work_done, bdi); struct wb_writeback_work fallback_work; struct wb_writeback_work *work; long nr_pages; if (last_wb) { wb_put(last_wb); last_wb = NULL; } /* SYNC_ALL writes out I_DIRTY_TIME too */ if (!wb_has_dirty_io(wb) && (base_work->sync_mode == WB_SYNC_NONE || list_empty(&wb->b_dirty_time))) continue; if (skip_if_busy && writeback_in_progress(wb)) continue; nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages); work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) { *work = *base_work; work->nr_pages = nr_pages; work->auto_free = 1; wb_queue_work(wb, work); continue; } /* * If wb_tryget fails, the wb has been shutdown, skip it. * * Pin @wb so that it stays on @bdi->wb_list. This allows * continuing iteration from @wb after dropping and * regrabbing rcu read lock. */ if (!wb_tryget(wb)) continue; /* alloc failed, execute synchronously using on-stack fallback */ work = &fallback_work; *work = *base_work; work->nr_pages = nr_pages; work->auto_free = 0; work->done = &fallback_work_done; wb_queue_work(wb, work); last_wb = wb; rcu_read_unlock(); wb_wait_for_completion(&fallback_work_done); goto restart; } rcu_read_unlock(); if (last_wb) wb_put(last_wb); } /** * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs * @bdi_id: target bdi id * @memcg_id: target memcg css id * @reason: reason why some writeback work initiated * @done: target wb_completion * * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id * with the specified parameters. */ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done) { struct backing_dev_info *bdi; struct cgroup_subsys_state *memcg_css; struct bdi_writeback *wb; struct wb_writeback_work *work; unsigned long dirty; int ret; /* lookup bdi and memcg */ bdi = bdi_get_by_id(bdi_id); if (!bdi) return -ENOENT; rcu_read_lock(); memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys); if (memcg_css && !css_tryget(memcg_css)) memcg_css = NULL; rcu_read_unlock(); if (!memcg_css) { ret = -ENOENT; goto out_bdi_put; } /* * And find the associated wb. If the wb isn't there already * there's nothing to flush, don't create one. */ wb = wb_get_lookup(bdi, memcg_css); if (!wb) { ret = -ENOENT; goto out_css_put; } /* * The caller is attempting to write out most of * the currently dirty pages. Let's take the current dirty page * count and inflate it by 25% which should be large enough to * flush out most dirty pages while avoiding getting livelocked by * concurrent dirtiers. * * BTW the memcg stats are flushed periodically and this is best-effort * estimation, so some potential error is ok. */ dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY); dirty = dirty * 10 / 8; /* issue the writeback work */ work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN); if (work) { work->nr_pages = dirty; work->sync_mode = WB_SYNC_NONE; work->range_cyclic = 1; work->reason = reason; work->done = done; work->auto_free = 1; wb_queue_work(wb, work); ret = 0; } else { ret = -ENOMEM; } wb_put(wb); out_css_put: css_put(memcg_css); out_bdi_put: bdi_put(bdi); return ret; } /** * cgroup_writeback_umount - flush inode wb switches for umount * * This function is called when a super_block is about to be destroyed and * flushes in-flight inode wb switches. An inode wb switch goes through * RCU and then workqueue, so the two need to be flushed in order to ensure * that all previously scheduled switches are finished. As wb switches are * rare occurrences and synchronize_rcu() can take a while, perform * flushing iff wb switches are in flight. */ void cgroup_writeback_umount(void) { /* * SB_ACTIVE should be reliably cleared before checking * isw_nr_in_flight, see generic_shutdown_super(). */ smp_mb(); if (atomic_read(&isw_nr_in_flight)) { /* * Use rcu_barrier() to wait for all pending callbacks to * ensure that all in-flight wb switches are in the workqueue. */ rcu_barrier(); flush_workqueue(isw_wq); } } static int __init cgroup_writeback_init(void) { isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); if (!isw_wq) return -ENOMEM; return 0; } fs_initcall(cgroup_writeback_init); #else /* CONFIG_CGROUP_WRITEBACK */ static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } static void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); } static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) __acquires(&wb->list_lock) { struct bdi_writeback *wb = inode_to_wb(inode); spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock); return wb; } static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode) __acquires(&wb->list_lock) { struct bdi_writeback *wb = inode_to_wb(inode); spin_lock(&wb->list_lock); return wb; } static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages) { return nr_pages; } static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, struct wb_writeback_work *base_work, bool skip_if_busy) { might_sleep(); if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { base_work->auto_free = 0; wb_queue_work(&bdi->wb, base_work); } } #endif /* CONFIG_CGROUP_WRITEBACK */ /* * Add in the number of potentially dirty inodes, because each inode * write can dirty pagecache in the underlying blockdev. */ static unsigned long get_nr_dirty_pages(void) { return global_node_page_state(NR_FILE_DIRTY) + get_nr_dirty_inodes(); } static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason) { if (!wb_has_dirty_io(wb)) return; /* * All callers of this function want to start writeback of all * dirty pages. Places like vmscan can call this at a very * high frequency, causing pointless allocations of tons of * work items and keeping the flusher threads busy retrieving * that work. Ensure that we only allow one of them pending and * inflight at the time. */ if (test_bit(WB_start_all, &wb->state) || test_and_set_bit(WB_start_all, &wb->state)) return; wb->start_all_reason = reason; wb_wakeup(wb); } /** * wb_start_background_writeback - start background writeback * @wb: bdi_writback to write from * * Description: * This makes sure WB_SYNC_NONE background writeback happens. When * this function returns, it is only guaranteed that for given wb * some IO is happening if we are over background dirty threshold. * Caller need not hold sb s_umount semaphore. */ void wb_start_background_writeback(struct bdi_writeback *wb) { /* * We just wake up the flusher thread. It will perform background * writeback as soon as there is no other work to do. */ trace_writeback_wake_background(wb); wb_wakeup(wb); } /* * Remove the inode from the writeback list it is on. */ void inode_io_list_del(struct inode *inode) { struct bdi_writeback *wb; wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); spin_unlock(&inode->i_lock); spin_unlock(&wb->list_lock); } EXPORT_SYMBOL(inode_io_list_del); /* * mark an inode as under writeback on the sb */ void sb_mark_inode_writeback(struct inode *inode) { struct super_block *sb = inode->i_sb; unsigned long flags; if (list_empty(&inode->i_wb_list)) { spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); if (list_empty(&inode->i_wb_list)) { list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb); trace_sb_mark_inode_writeback(inode); } spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); } } /* * clear an inode as under writeback on the sb */ void sb_clear_inode_writeback(struct inode *inode) { struct super_block *sb = inode->i_sb; unsigned long flags; if (!list_empty(&inode->i_wb_list)) { spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); if (!list_empty(&inode->i_wb_list)) { list_del_init(&inode->i_wb_list); trace_sb_clear_inode_writeback(inode); } spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); } } /* * Redirty an inode: set its when-it-was dirtied timestamp and move it to the * furthest end of its superblock's dirty-inode list. * * Before stamping the inode's ->dirtied_when, we check to see whether it is * already the most-recently-dirtied inode on the b_dirty list. If that is * the case then the inode must have been redirtied while it was being written * out and we don't reset its dirtied_when. */ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&inode->i_lock); inode->i_state &= ~I_SYNC_QUEUED; /* * When the inode is being freed just don't bother with dirty list * tracking. Flush worker will ignore this inode anyway and it will * trigger assertions in inode_io_list_move_locked(). */ if (inode->i_state & I_FREEING) { list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); return; } if (!list_empty(&wb->b_dirty)) { struct inode *tail; tail = wb_inode(wb->b_dirty.next); if (time_before(inode->dirtied_when, tail->dirtied_when)) inode->dirtied_when = jiffies; } inode_io_list_move_locked(inode, wb, &wb->b_dirty); } static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) { spin_lock(&inode->i_lock); redirty_tail_locked(inode, wb); spin_unlock(&inode->i_lock); } /* * requeue inode for re-scanning after bdi->b_io list is exhausted. */ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) { inode_io_list_move_locked(inode, wb, &wb->b_more_io); } static void inode_sync_complete(struct inode *inode) { inode->i_state &= ~I_SYNC; /* If inode is clean an unused, put it into LRU now... */ inode_add_lru(inode); /* Waiters must see I_SYNC cleared before being woken up */ smp_mb(); wake_up_bit(&inode->i_state, __I_SYNC); } static bool inode_dirtied_after(struct inode *inode, unsigned long t) { bool ret = time_after(inode->dirtied_when, t); #ifndef CONFIG_64BIT /* * For inodes being constantly redirtied, dirtied_when can get stuck. * It _appears_ to be in the future, but is actually in distant past. * This test is necessary to prevent such wrapped-around relative times * from permanently stopping the whole bdi writeback. */ ret = ret && time_before_eq(inode->dirtied_when, jiffies); #endif return ret; } /* * Move expired (dirtied before dirtied_before) dirty inodes from * @delaying_queue to @dispatch_queue. */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, unsigned long dirtied_before) { LIST_HEAD(tmp); struct list_head *pos, *node; struct super_block *sb = NULL; struct inode *inode; int do_sb_sort = 0; int moved = 0; while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); if (inode_dirtied_after(inode, dirtied_before)) break; spin_lock(&inode->i_lock); list_move(&inode->i_io_list, &tmp); moved++; inode->i_state |= I_SYNC_QUEUED; spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) continue; if (sb && sb != inode->i_sb) do_sb_sort = 1; sb = inode->i_sb; } /* just one sb in list, splice to dispatch_queue and we're done */ if (!do_sb_sort) { list_splice(&tmp, dispatch_queue); goto out; } /* * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue', * we don't take inode->i_lock here because it is just a pointless overhead. * Inode is already marked as I_SYNC_QUEUED so writeback list handling is * fully under our control. */ while (!list_empty(&tmp)) { sb = wb_inode(tmp.prev)->i_sb; list_for_each_prev_safe(pos, node, &tmp) { inode = wb_inode(pos); if (inode->i_sb == sb) list_move(&inode->i_io_list, dispatch_queue); } } out: return moved; } /* * Queue all expired dirty inodes for io, eldest first. * Before * newly dirtied b_dirty b_io b_more_io * =============> gf edc BA * After * newly dirtied b_dirty b_io b_more_io * =============> g fBAedc * | * +--> dequeue for IO */ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work, unsigned long dirtied_before) { int moved; unsigned long time_expire_jif = dirtied_before; assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before); if (!work->for_sync) time_expire_jif = jiffies - dirtytime_expire_interval * HZ; moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, time_expire_jif); if (moved) wb_io_lists_populated(wb); trace_writeback_queue_io(wb, work, dirtied_before, moved); } static int write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { trace_writeback_write_inode_start(inode, wbc); ret = inode->i_sb->s_op->write_inode(inode, wbc); trace_writeback_write_inode(inode, wbc); return ret; } return 0; } /* * Wait for writeback on an inode to complete. Called with i_lock held. * Caller must make sure inode cannot go away when we drop i_lock. */ static void __inode_wait_for_writeback(struct inode *inode) __releases(inode->i_lock) __acquires(inode->i_lock) { DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); wait_queue_head_t *wqh; wqh = bit_waitqueue(&inode->i_state, __I_SYNC); while (inode->i_state & I_SYNC) { spin_unlock(&inode->i_lock); __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); spin_lock(&inode->i_lock); } } /* * Wait for writeback on an inode to complete. Caller must have inode pinned. */ void inode_wait_for_writeback(struct inode *inode) { spin_lock(&inode->i_lock); __inode_wait_for_writeback(inode); spin_unlock(&inode->i_lock); } /* * Sleep until I_SYNC is cleared. This function must be called with i_lock * held and drops it. It is aimed for callers not holding any inode reference * so once i_lock is dropped, inode can go away. */ static void inode_sleep_on_writeback(struct inode *inode) __releases(inode->i_lock) { DEFINE_WAIT(wait); wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC); int sleep; prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); sleep = inode->i_state & I_SYNC; spin_unlock(&inode->i_lock); if (sleep) schedule(); finish_wait(wqh, &wait); } /* * Find proper writeback list for the inode depending on its current state and * possibly also change of its state while we were doing writeback. Here we * handle things such as livelock prevention or fairness of writeback among * inodes. This function can be called only by flusher thread - noone else * processes all inodes in writeback lists and requeueing inodes behind flusher * thread's back can have unexpected consequences. */ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, struct writeback_control *wbc) { if (inode->i_state & I_FREEING) return; /* * Sync livelock prevention. Each inode is tagged and synced in one * shot. If still dirty, it will be redirty_tail()'ed below. Update * the dirty time to prevent enqueue and sync it again. */ if ((inode->i_state & I_DIRTY) && (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) inode->dirtied_when = jiffies; if (wbc->pages_skipped) { /* * Writeback is not making progress due to locked buffers. * Skip this inode for now. Although having skipped pages * is odd for clean inodes, it can happen for some * filesystems so handle that gracefully. */ if (inode->i_state & I_DIRTY_ALL) redirty_tail_locked(inode, wb); else inode_cgwb_move_to_attached(inode, wb); return; } if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() * sometimes bales out without doing anything. */ if (wbc->nr_to_write <= 0) { /* Slice used up. Queue for next turn. */ requeue_io(inode, wb); } else { /* * Writeback blocked by something other than * congestion. Delay the inode for some time to * avoid spinning on the CPU (100% iowait) * retrying writeback of the dirty page/inode * that cannot be performed immediately. */ redirty_tail_locked(inode, wb); } } else if (inode->i_state & I_DIRTY) { /* * Filesystems can dirty the inode during writeback operations, * such as delayed allocation during submission or metadata * updates after data IO completion. */ redirty_tail_locked(inode, wb); } else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); inode->i_state &= ~I_SYNC_QUEUED; } else { /* The inode is clean. Remove from writeback lists. */ inode_cgwb_move_to_attached(inode, wb); } } /* * Write out an inode and its dirty pages (or some of its dirty pages, depending * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state. * * This doesn't remove the inode from the writeback list it is on, except * potentially to move it from b_dirty_time to b_dirty due to timestamp * expiration. The caller is otherwise responsible for writeback list handling. * * The caller is also responsible for setting the I_SYNC flag beforehand and * calling inode_sync_complete() to clear it afterwards. */ static int __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; long nr_to_write = wbc->nr_to_write; unsigned dirty; int ret; WARN_ON(!(inode->i_state & I_SYNC)); trace_writeback_single_inode_start(inode, wbc, nr_to_write); ret = do_writepages(mapping, wbc); /* * Make sure to wait on the data before writing out the metadata. * This is important for filesystems that modify metadata on data * I/O completion. We don't do it for sync(2) writeback because it has a * separate, external IO completion path and ->sync_fs for guaranteeing * inode metadata is written back correctly. */ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; } /* * If the inode has dirty timestamps and we need to write them, call * mark_inode_dirty_sync() to notify the filesystem about it and to * change I_DIRTY_TIME into I_DIRTY_SYNC. */ if ((inode->i_state & I_DIRTY_TIME) && (wbc->sync_mode == WB_SYNC_ALL || time_after(jiffies, inode->dirtied_time_when + dirtytime_expire_interval * HZ))) { trace_writeback_lazytime(inode); mark_inode_dirty_sync(inode); } /* * Get and clear the dirty flags from i_state. This needs to be done * after calling writepages because some filesystems may redirty the * inode during writepages due to delalloc. It also needs to be done * after handling timestamp expiration, as that may dirty the inode too. */ spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; inode->i_state &= ~dirty; /* * Paired with smp_mb() in __mark_inode_dirty(). This allows * __mark_inode_dirty() to test i_state without grabbing i_lock - * either they see the I_DIRTY bits cleared or we see the dirtied * inode. * * I_DIRTY_PAGES is always cleared together above even if @mapping * still has dirty pages. The flag is reinstated after smp_mb() if * necessary. This guarantees that either __mark_inode_dirty() * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY. */ smp_mb(); if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) inode->i_state |= I_DIRTY_PAGES; else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) { if (!(inode->i_state & I_DIRTY_PAGES)) { inode->i_state &= ~I_PINNING_NETFS_WB; wbc->unpinned_netfs_wb = true; dirty |= I_PINNING_NETFS_WB; /* Cause write_inode */ } } spin_unlock(&inode->i_lock); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & ~I_DIRTY_PAGES) { int err = write_inode(inode, wbc); if (ret == 0) ret = err; } wbc->unpinned_netfs_wb = false; trace_writeback_single_inode(inode, wbc, nr_to_write); return ret; } /* * Write out an inode's dirty data and metadata on-demand, i.e. separately from * the regular batched writeback done by the flusher threads in * writeback_sb_inodes(). @wbc controls various aspects of the write, such as * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE). * * To prevent the inode from going away, either the caller must have a reference * to the inode, or the inode must have I_WILL_FREE or I_FREEING set. */ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct bdi_writeback *wb; int ret = 0; spin_lock(&inode->i_lock); if (!atomic_read(&inode->i_count)) WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); else WARN_ON(inode->i_state & I_WILL_FREE); if (inode->i_state & I_SYNC) { /* * Writeback is already running on the inode. For WB_SYNC_NONE, * that's enough and we can just return. For WB_SYNC_ALL, we * must wait for the existing writeback to complete, then do * writeback again if there's anything left. */ if (wbc->sync_mode != WB_SYNC_ALL) goto out; __inode_wait_for_writeback(inode); } WARN_ON(inode->i_state & I_SYNC); /* * If the inode is already fully clean, then there's nothing to do. * * For data-integrity syncs we also need to check whether any pages are * still under writeback, e.g. due to prior WB_SYNC_NONE writeback. If * there are any such pages, we'll need to wait for them. */ if (!(inode->i_state & I_DIRTY_ALL) && (wbc->sync_mode != WB_SYNC_ALL || !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) goto out; inode->i_state |= I_SYNC; wbc_attach_and_unlock_inode(wbc, inode); ret = __writeback_single_inode(inode, wbc); wbc_detach_inode(wbc); wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* * If the inode is freeing, its i_io_list shoudn't be updated * as it can be finally deleted at this moment. */ if (!(inode->i_state & I_FREEING)) { /* * If the inode is now fully clean, then it can be safely * removed from its writeback list (if any). Otherwise the * flusher threads are responsible for the writeback lists. */ if (!(inode->i_state & I_DIRTY_ALL)) inode_cgwb_move_to_attached(inode, wb); else if (!(inode->i_state & I_SYNC_QUEUED)) { if ((inode->i_state & I_DIRTY)) redirty_tail_locked(inode, wb); else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); } } } spin_unlock(&wb->list_lock); inode_sync_complete(inode); out: spin_unlock(&inode->i_lock); return ret; } static long writeback_chunk_size(struct bdi_writeback *wb, struct wb_writeback_work *work) { long pages; /* * WB_SYNC_ALL mode does livelock avoidance by syncing dirty * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX * here avoids calling into writeback_inodes_wb() more than once. * * The intended call sequence for WB_SYNC_ALL writeback is: * * wb_writeback() * writeback_sb_inodes() <== called only once * write_cache_pages() <== called once for each inode * (quickly) tag currently dirty pages * (maybe slowly) sync all tagged pages */ if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) pages = LONG_MAX; else { pages = min(wb->avg_write_bandwidth / 2, global_wb_domain.dirty_limit / DIRTY_SCOPE); pages = min(pages, work->nr_pages); pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES); } return pages; } /* * Write a portion of b_io inodes which belong to @sb. * * Return the number of pages and/or inodes written. * * NOTE! This is called with wb->list_lock held, and will * unlock and relock that for each inode it ends up doing * IO for. */ static long writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, struct wb_writeback_work *work) { struct writeback_control wbc = { .sync_mode = work->sync_mode, .tagged_writepages = work->tagged_writepages, .for_kupdate = work->for_kupdate, .for_background = work->for_background, .for_sync = work->for_sync, .range_cyclic = work->range_cyclic, .range_start = 0, .range_end = LLONG_MAX, }; unsigned long start_time = jiffies; long write_chunk; long total_wrote = 0; /* count both pages and inodes */ while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); struct bdi_writeback *tmp_wb; long wrote; if (inode->i_sb != sb) { if (work->sb) { /* * We only want to write back data for this * superblock, move all inodes not belonging * to it back onto the dirty list. */ redirty_tail(inode, wb); continue; } /* * The inode belongs to a different superblock. * Bounce back to the caller to unpin this and * pin the next superblock. */ break; } /* * Don't bother with new inodes or inodes being freed, first * kind does not need periodic writeout yet, and for the latter * kind writeout is handled by the freer. */ spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { redirty_tail_locked(inode, wb); spin_unlock(&inode->i_lock); continue; } if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { /* * If this inode is locked for writeback and we are not * doing writeback-for-data-integrity, move it to * b_more_io so that writeback can proceed with the * other inodes on s_io. * * We'll have another go at writing back this inode * when we completed a full scan of b_io. */ requeue_io(inode, wb); spin_unlock(&inode->i_lock); trace_writeback_sb_inodes_requeue(inode); continue; } spin_unlock(&wb->list_lock); /* * We already requeued the inode if it had I_SYNC set and we * are doing WB_SYNC_NONE writeback. So this catches only the * WB_SYNC_ALL case. */ if (inode->i_state & I_SYNC) { /* Wait for I_SYNC. This function drops i_lock... */ inode_sleep_on_writeback(inode); /* Inode may be gone, start again */ spin_lock(&wb->list_lock); continue; } inode->i_state |= I_SYNC; wbc_attach_and_unlock_inode(&wbc, inode); write_chunk = writeback_chunk_size(wb, work); wbc.nr_to_write = write_chunk; wbc.pages_skipped = 0; /* * We use I_SYNC to pin the inode in memory. While it is set * evict_inode() will wait so the inode cannot be freed. */ __writeback_single_inode(inode, &wbc); wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; wrote = wrote < 0 ? 0 : wrote; total_wrote += wrote; if (need_resched()) { /* * We're trying to balance between building up a nice * long list of IOs to improve our merge rate, and * getting those IOs out quickly for anyone throttling * in balance_dirty_pages(). cond_resched() doesn't * unplug, so get our IOs out the door before we * give up the CPU. */ blk_flush_plug(current->plug, false); cond_resched(); } /* * Requeue @inode if still dirty. Be careful as @inode may * have been switched to another wb in the meantime. */ tmp_wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) total_wrote++; requeue_inode(inode, tmp_wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); if (unlikely(tmp_wb != wb)) { spin_unlock(&tmp_wb->list_lock); spin_lock(&wb->list_lock); } /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. */ if (total_wrote) { if (time_is_before_jiffies(start_time + HZ / 10UL)) break; if (work->nr_pages <= 0) break; } } return total_wrote; } static long __writeback_inodes_wb(struct bdi_writeback *wb, struct wb_writeback_work *work) { unsigned long start_time = jiffies; long wrote = 0; while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); struct super_block *sb = inode->i_sb; if (!super_trylock_shared(sb)) { /* * super_trylock_shared() may fail consistently due to * s_umount being grabbed by someone else. Don't use * requeue_io() to avoid busy retrying the inode/sb. */ redirty_tail(inode, wb); continue; } wrote += writeback_sb_inodes(sb, wb, work); up_read(&sb->s_umount); /* refer to the same tests at the end of writeback_sb_inodes */ if (wrote) { if (time_is_before_jiffies(start_time + HZ / 10UL)) break; if (work->nr_pages <= 0) break; } } /* Leave any unwritten inodes on b_io */ return wrote; } static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason) { struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = reason, }; struct blk_plug plug; blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) queue_io(wb, &work, jiffies); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); blk_finish_plug(&plug); return nr_pages - work.nr_pages; } /* * Explicit flushing or periodic writeback of "old" data. * * Define "old": the first time one of an inode's pages is dirtied, we mark the * dirtying-time in the inode's address_space. So this periodic writeback code * just walks the superblock inode list, writing back any inodes which are * older than a specific point in time. * * Try to run once per dirty_writeback_interval. But if a writeback event * takes longer than a dirty_writeback_interval interval, then leave a * one-second gap. * * dirtied_before takes precedence over nr_to_write. So we'll only write back * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, struct wb_writeback_work *work) { long nr_pages = work->nr_pages; unsigned long dirtied_before = jiffies; struct inode *inode; long progress; struct blk_plug plug; blk_start_plug(&plug); for (;;) { /* * Stop writeback when nr_pages has been consumed */ if (work->nr_pages <= 0) break; /* * Background writeout and kupdate-style writeback may * run forever. Stop them if there is other work to do * so that e.g. sync can proceed. They'll be restarted * after the other works are all done. */ if ((work->for_background || work->for_kupdate) && !list_empty(&wb->work_list)) break; /* * For background writeout, stop when we are below the * background dirty threshold */ if (work->for_background && !wb_over_bg_thresh(wb)) break; spin_lock(&wb->list_lock); /* * Kupdate and background works are special and we want to * include all inodes that need writing. Livelock avoidance is * handled by these works yielding to any other work so we are * safe. */ if (work->for_kupdate) { dirtied_before = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) dirtied_before = jiffies; trace_writeback_start(wb, work); if (list_empty(&wb->b_io)) queue_io(wb, work, dirtied_before); if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else progress = __writeback_inodes_wb(wb, work); trace_writeback_written(wb, work); /* * Did we write something? Try for more * * Dirty inodes are moved to b_io for writeback in batches. * The completion of the current batch does not necessarily * mean the overall work is done. So we keep looping as long * as made some progress on cleaning pages or inodes. */ if (progress) { spin_unlock(&wb->list_lock); continue; } /* * No more inodes for IO, bail */ if (list_empty(&wb->b_more_io)) { spin_unlock(&wb->list_lock); break; } /* * Nothing written. Wait for some inode to * become available for writeback. Otherwise * we'll just busyloop. */ trace_writeback_wait(wb, work); inode = wb_inode(wb->b_more_io.prev); spin_lock(&inode->i_lock); spin_unlock(&wb->list_lock); /* This function drops i_lock... */ inode_sleep_on_writeback(inode); } blk_finish_plug(&plug); return nr_pages - work->nr_pages; } /* * Return the next wb_writeback_work struct that hasn't been processed yet. */ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) { struct wb_writeback_work *work = NULL; spin_lock_irq(&wb->work_lock); if (!list_empty(&wb->work_list)) { work = list_entry(wb->work_list.next, struct wb_writeback_work, list); list_del_init(&work->list); } spin_unlock_irq(&wb->work_lock); return work; } static long wb_check_background_flush(struct bdi_writeback *wb) { if (wb_over_bg_thresh(wb)) { struct wb_writeback_work work = { .nr_pages = LONG_MAX, .sync_mode = WB_SYNC_NONE, .for_background = 1, .range_cyclic = 1, .reason = WB_REASON_BACKGROUND, }; return wb_writeback(wb, &work); } return 0; } static long wb_check_old_data_flush(struct bdi_writeback *wb) { unsigned long expired; long nr_pages; /* * When set to zero, disable periodic writeback */ if (!dirty_writeback_interval) return 0; expired = wb->last_old_flush + msecs_to_jiffies(dirty_writeback_interval * 10); if (time_before(jiffies, expired)) return 0; wb->last_old_flush = jiffies; nr_pages = get_nr_dirty_pages(); if (nr_pages) { struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .for_kupdate = 1, .range_cyclic = 1, .reason = WB_REASON_PERIODIC, }; return wb_writeback(wb, &work); } return 0; } static long wb_check_start_all(struct bdi_writeback *wb) { long nr_pages; if (!test_bit(WB_start_all, &wb->state)) return 0; nr_pages = get_nr_dirty_pages(); if (nr_pages) { struct wb_writeback_work work = { .nr_pages = wb_split_bdi_pages(wb, nr_pages), .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = wb->start_all_reason, }; nr_pages = wb_writeback(wb, &work); } clear_bit(WB_start_all, &wb->state); return nr_pages; } /* * Retrieve work items and do the writeback they describe */ static long wb_do_writeback(struct bdi_writeback *wb) { struct wb_writeback_work *work; long wrote = 0; set_bit(WB_writeback_running, &wb->state); while ((work = get_next_work_item(wb)) != NULL) { trace_writeback_exec(wb, work); wrote += wb_writeback(wb, work); finish_writeback_work(wb, work); } /* * Check for a flush-everything request */ wrote += wb_check_start_all(wb); /* * Check for periodic writeback, kupdated() style */ wrote += wb_check_old_data_flush(wb); wrote += wb_check_background_flush(wb); clear_bit(WB_writeback_running, &wb->state); return wrote; } /* * Handle writeback of dirty data for the device backed by this bdi. Also * reschedules periodically and does kupdated style flushing. */ void wb_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(to_delayed_work(work), struct bdi_writeback, dwork); long pages_written; set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); if (likely(!current_is_workqueue_rescuer() || !test_bit(WB_registered, &wb->state))) { /* * The normal path. Keep writing back @wb until its * work_list is empty. Note that this path is also taken * if @wb is shutting down even when we're running off the * rescuer as work_list needs to be drained. */ do { pages_written = wb_do_writeback(wb); trace_writeback_pages_written(pages_written); } while (!list_empty(&wb->work_list)); } else { /* * bdi_wq can't get enough workers and we're running off * the emergency worker. Don't hog it. Hopefully, 1024 is * enough for efficient IO. */ pages_written = writeback_inodes_wb(wb, 1024, WB_REASON_FORKER_THREAD); trace_writeback_pages_written(pages_written); } if (!list_empty(&wb->work_list)) wb_wakeup(wb); else if (wb_has_dirty_io(wb) && dirty_writeback_interval) wb_wakeup_delayed(wb); } /* * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero, * write back the whole world. */ static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason) { struct bdi_writeback *wb; if (!bdi_has_dirty_io(bdi)) return; list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) wb_start_writeback(wb, reason); } void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason) { rcu_read_lock(); __wakeup_flusher_threads_bdi(bdi, reason); rcu_read_unlock(); } /* * Wakeup the flusher threads to start writeback of all currently dirty pages */ void wakeup_flusher_threads(enum wb_reason reason) { struct backing_dev_info *bdi; /* * If we are expecting writeback progress we must submit plugged IO. */ blk_flush_plug(current->plug, true); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) __wakeup_flusher_threads_bdi(bdi, reason); rcu_read_unlock(); } /* * Wake up bdi's periodically to make sure dirtytime inodes gets * written back periodically. We deliberately do *not* check the * b_dirtytime list in wb_has_dirty_io(), since this would cause the * kernel to be constantly waking up once there are any dirtytime * inodes on the system. So instead we define a separate delayed work * function which gets called much more rarely. (By default, only * once every 12 hours.) * * If there is any other write activity going on in the file system, * this function won't be necessary. But if the only thing that has * happened on the file system is a dirtytime inode caused by an atime * update, we need this infrastructure below to make sure that inode * eventually gets pushed out to disk. */ static void wakeup_dirtytime_writeback(struct work_struct *w); static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); static void wakeup_dirtytime_writeback(struct work_struct *w) { struct backing_dev_info *bdi; rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { struct bdi_writeback *wb; list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) if (!list_empty(&wb->b_dirty_time)) wb_wakeup(wb); } rcu_read_unlock(); schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); } static int __init start_dirtytime_writeback(void) { schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); return 0; } __initcall(start_dirtytime_writeback); int dirtytime_interval_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) mod_delayed_work(system_wq, &dirtytime_work, 0); return ret; } /** * __mark_inode_dirty - internal function to mark an inode dirty * * @inode: inode to mark * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined * with I_DIRTY_PAGES. * * Mark an inode as dirty. We notify the filesystem, then update the inode's * dirty flags. Then, if needed we add the inode to the appropriate dirty list. * * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync() * instead of calling this directly. * * CAREFUL! We only add the inode to the dirty list if it is hashed or if it * refers to a blockdev. Unhashed inodes will never be added to the dirty list * even if they are later hashed, as they will have been marked dirty already. * * In short, ensure you hash any inodes _before_ you start marking them dirty. * * Note that for blockdevs, inode->dirtied_when represents the dirtying time of * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of * the kernel-internal blockdev inode represents the dirtying time of the * blockdev's pages. This is why for I_DIRTY_PAGES we always use * page->mapping->host, so the page-dirtying time is recorded in the internal * blockdev inode. */ void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; int dirtytime = 0; struct bdi_writeback *wb = NULL; trace_writeback_mark_inode_dirty(inode, flags); if (flags & I_DIRTY_INODE) { /* * Inode timestamp update will piggback on this dirtying. * We tell ->dirty_inode callback that timestamps need to * be updated by setting I_DIRTY_TIME in flags. */ if (inode->i_state & I_DIRTY_TIME) { spin_lock(&inode->i_lock); if (inode->i_state & I_DIRTY_TIME) { inode->i_state &= ~I_DIRTY_TIME; flags |= I_DIRTY_TIME; } spin_unlock(&inode->i_lock); } /* * Notify the filesystem about the inode being dirtied, so that * (if needed) it can update on-disk fields and journal the * inode. This is only needed when the inode itself is being * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not * for just I_DIRTY_PAGES or I_DIRTY_TIME. */ trace_writeback_dirty_inode_start(inode, flags); if (sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode, flags & (I_DIRTY_INODE | I_DIRTY_TIME)); trace_writeback_dirty_inode(inode, flags); /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ flags &= ~I_DIRTY_TIME; } else { /* * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing. * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME * in one call to __mark_inode_dirty().) */ dirtytime = flags & I_DIRTY_TIME; WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME); } /* * Paired with smp_mb() in __writeback_single_inode() for the * following lockless i_state test. See there for details. */ smp_mb(); if ((inode->i_state & flags) == flags) return; spin_lock(&inode->i_lock); if ((inode->i_state & flags) != flags) { const int was_dirty = inode->i_state & I_DIRTY; inode_attach_wb(inode, NULL); inode->i_state |= flags; /* * Grab inode's wb early because it requires dropping i_lock and we * need to make sure following checks happen atomically with dirty * list handling so that we don't move inodes under flush worker's * hands. */ if (!was_dirty) { wb = locked_inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); } /* * If the inode is queued for writeback by flush worker, just * update its dirty state. Once the flush worker is done with * the inode it will place it on the appropriate superblock * list, based upon its state. */ if (inode->i_state & I_SYNC_QUEUED) goto out_unlock; /* * Only add valid (hashed) inodes to the superblock's * dirty list. Add blockdev inodes as well. */ if (!S_ISBLK(inode->i_mode)) { if (inode_unhashed(inode)) goto out_unlock; } if (inode->i_state & I_FREEING) goto out_unlock; /* * If the inode was already on b_dirty/b_io/b_more_io, don't * reposition it (that would break b_dirty time-ordering). */ if (!was_dirty) { struct list_head *dirty_list; bool wakeup_bdi = false; inode->dirtied_when = jiffies; if (dirtytime) inode->dirtied_time_when = jiffies; if (inode->i_state & I_DIRTY) dirty_list = &wb->b_dirty; else dirty_list = &wb->b_dirty_time; wakeup_bdi = inode_io_list_move_locked(inode, wb, dirty_list); spin_unlock(&wb->list_lock); spin_unlock(&inode->i_lock); trace_writeback_dirty_inode_enqueue(inode); /* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread * to make sure background write-back happens * later. */ if (wakeup_bdi && (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) wb_wakeup_delayed(wb); return; } } out_unlock: if (wb) spin_unlock(&wb->list_lock); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(__mark_inode_dirty); /* * The @s_sync_lock is used to serialise concurrent sync operations * to avoid lock contention problems with concurrent wait_sb_inodes() calls. * Concurrent callers will block on the s_sync_lock rather than doing contending * walks. The queueing maintains sync(2) required behaviour as all the IO that * has been issued up to the time this function is enter is guaranteed to be * completed by the time we have gained the lock and waited for all IO that is * in progress regardless of the order callers are granted the lock. */ static void wait_sb_inodes(struct super_block *sb) { LIST_HEAD(sync_list); /* * We need to be protected against the filesystem going from * r/o to r/w or vice versa. */ WARN_ON(!rwsem_is_locked(&sb->s_umount)); mutex_lock(&sb->s_sync_lock); /* * Splice the writeback list onto a temporary list to avoid waiting on * inodes that have started writeback after this point. * * Use rcu_read_lock() to keep the inodes around until we have a * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as * the local list because inodes can be dropped from either by writeback * completion. */ rcu_read_lock(); spin_lock_irq(&sb->s_inode_wblist_lock); list_splice_init(&sb->s_inodes_wb, &sync_list); /* * Data integrity sync. Must wait for all pages under writeback, because * there may have been pages dirtied before our sync call, but which had * writeout started before we write it out. In which case, the inode * may not be on the dirty list, but we still have to wait for that * writeout. */ while (!list_empty(&sync_list)) { struct inode *inode = list_first_entry(&sync_list, struct inode, i_wb_list); struct address_space *mapping = inode->i_mapping; /* * Move each inode back to the wb list before we drop the lock * to preserve consistency between i_wb_list and the mapping * writeback tag. Writeback completion is responsible to remove * the inode from either list once the writeback tag is cleared. */ list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb); /* * The mapping can appear untagged while still on-list since we * do not have the mapping lock. Skip it here, wb completion * will remove it. */ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) continue; spin_unlock_irq(&sb->s_inode_wblist_lock); spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { spin_unlock(&inode->i_lock); spin_lock_irq(&sb->s_inode_wblist_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); rcu_read_unlock(); /* * We keep the error status of individual mapping so that * applications can catch the writeback error using fsync(2). * See filemap_fdatawait_keep_errors() for details. */ filemap_fdatawait_keep_errors(mapping); cond_resched(); iput(inode); rcu_read_lock(); spin_lock_irq(&sb->s_inode_wblist_lock); } spin_unlock_irq(&sb->s_inode_wblist_lock); rcu_read_unlock(); mutex_unlock(&sb->s_sync_lock); } static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, enum wb_reason reason, bool skip_if_busy) { struct backing_dev_info *bdi = sb->s_bdi; DEFINE_WB_COMPLETION(done, bdi); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_NONE, .tagged_writepages = 1, .done = &done, .nr_pages = nr, .reason = reason, }; if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info) return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy); wb_wait_for_completion(&done); } /** * writeback_inodes_sb_nr - writeback dirty inodes from given super_block * @sb: the superblock * @nr: the number of pages to write * @reason: reason why some writeback work initiated * * Start writeback on some inodes on this super_block. No guarantees are made * on how many (if any) will be written, and this function does not wait * for IO completion of submitted IO. */ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, enum wb_reason reason) { __writeback_inodes_sb_nr(sb, nr, reason, false); } EXPORT_SYMBOL(writeback_inodes_sb_nr); /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock * @reason: reason why some writeback work was initiated * * Start writeback on some inodes on this super_block. No guarantees are made * on how many (if any) will be written, and this function does not wait * for IO completion of submitted IO. */ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); } EXPORT_SYMBOL(writeback_inodes_sb); /** * try_to_writeback_inodes_sb - try to start writeback if none underway * @sb: the superblock * @reason: reason why some writeback work was initiated * * Invoke __writeback_inodes_sb_nr if no writeback is currently underway. */ void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { if (!down_read_trylock(&sb->s_umount)) return; __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true); up_read(&sb->s_umount); } EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages * @sb: the superblock * * This function writes and waits on any dirty inode belonging to this * super_block. */ void sync_inodes_sb(struct super_block *sb) { struct backing_dev_info *bdi = sb->s_bdi; DEFINE_WB_COMPLETION(done, bdi); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, .for_sync = 1, }; /* * Can't skip on !bdi_has_dirty() because we should wait for !dirty * inodes under writeback and I_DIRTY_TIME inodes ignored by * bdi_has_dirty() need to be written out too. */ if (bdi == &noop_backing_dev_info) return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ bdi_down_write_wb_switch_rwsem(bdi); bdi_split_work_to_wbs(bdi, &work, false); wb_wait_for_completion(&done); bdi_up_write_wb_switch_rwsem(bdi); wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); /** * write_inode_now - write an inode to disk * @inode: inode to write to disk * @sync: whether the write should be synchronous or not * * This function commits an inode to disk immediately if it is dirty. This is * primarily needed by knfsd. * * The caller must either have a ref on the inode or must have set I_WILL_FREE. */ int write_inode_now(struct inode *inode, int sync) { struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, .range_start = 0, .range_end = LLONG_MAX, }; if (!mapping_can_writeback(inode->i_mapping)) wbc.nr_to_write = 0; might_sleep(); return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(write_inode_now); /** * sync_inode_metadata - write an inode to disk * @inode: the inode to sync * @wait: wait for I/O to complete. * * Write an inode to disk and adjust its dirty state after completion. * * Note: only writes the actual inode, no associated data or other metadata. */ int sync_inode_metadata(struct inode *inode, int wait) { struct writeback_control wbc = { .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, .nr_to_write = 0, /* metadata-only */ }; return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(sync_inode_metadata);
428 428 428 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 // SPDX-License-Identifier: GPL-2.0 /* * procfs-based user access to knfsd statistics * * /proc/net/rpc/nfsd * * Format: * rc <hits> <misses> <nocache> * Statistsics for the reply cache * fh <stale> <deprecated filehandle cache stats> * statistics for filehandle lookup * io <bytes-read> <bytes-written> * statistics for IO throughput * th <threads> <deprecated thread usage histogram stats> * number of threads * ra <deprecated ra-cache stats> * * plus generic RPC stats (see net/sunrpc/stats.c) * * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/sunrpc/stats.h> #include <net/net_namespace.h> #include "nfsd.h" struct nfsd_stats nfsdstats; struct svc_stat nfsd_svcstats = { .program = &nfsd_program, }; static int nfsd_show(struct seq_file *seq, void *v) { int i; seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]), percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]), percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]), percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]), percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]), percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); /* thread usage: */ seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); /* deprecated thread usage histogram stats */ for (i = 0; i < 10; i++) seq_puts(seq, " 0.000"); /* deprecated ra-cache stats */ seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n"); /* show my rpc info */ svc_seq_show(seq, &nfsd_svcstats); #ifdef CONFIG_NFSD_V4 /* Show count for individual nfsv4 operations */ /* Writing operation numbers 0 1 2 also for maintaining uniformity */ seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1); for (i = 0; i <= LAST_NFS4_OP; i++) { seq_printf(seq, " %lld", percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)])); } seq_printf(seq, "\nwdeleg_getattr %lld", percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR])); seq_putc(seq, '\n'); #endif return 0; } DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); int nfsd_percpu_counters_init(struct percpu_counter *counters, int num) { int i, err = 0; for (i = 0; !err && i < num; i++) err = percpu_counter_init(&counters[i], 0, GFP_KERNEL); if (!err) return 0; for (; i > 0; i--) percpu_counter_destroy(&counters[i-1]); return err; } void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num) { int i; for (i = 0; i < num; i++) percpu_counter_set(&counters[i], 0); } void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) { int i; for (i = 0; i < num; i++) percpu_counter_destroy(&counters[i]); } static int nfsd_stat_counters_init(void) { return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); } static void nfsd_stat_counters_destroy(void) { nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); } int nfsd_stat_init(void) { int err; err = nfsd_stat_counters_init(); if (err) return err; svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); return 0; } void nfsd_stat_shutdown(void) { nfsd_stat_counters_destroy(); svc_proc_unregister(&init_net, "nfsd"); }
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 /* * linux/fs/nls/nls_iso8859-7.c * * Charset iso8859-7 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, /* 0x90*/ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, /* 0xa0*/ 0x00a0, 0x02bd, 0x02bc, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, /* 0xb0*/ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, /* 0xc0*/ 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, /* 0xd0*/ 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, /* 0xe0*/ 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, /* 0xf0*/ 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0x00, 0x00, 0xa3, 0x00, 0x00, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0x00, 0x00, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0x00, 0x00, 0x00, 0xb7, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0xbb, 0x00, 0xbd, 0x00, 0x00, /* 0xb8-0xbf */ }; static const unsigned char page02[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0xa2, 0xa1, 0x00, 0x00, /* 0xb8-0xbf */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0xb5, 0xb6, 0x00, /* 0x80-0x87 */ 0xb8, 0xb9, 0xba, 0x00, 0xbc, 0x00, 0xbe, 0xbf, /* 0x88-0x8f */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0x90-0x97 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0x98-0x9f */ 0xd0, 0xd1, 0x00, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xa0-0xa7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xa8-0xaf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xb0-0xb7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xb8-0xbf */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xc0-0xc7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x00, /* 0xc8-0xcf */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x00, /* 0x10-0x17 */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, page02, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0x00, 0x00, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0x00, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xdc, 0xb7, /* 0xb0-0xb7 */ 0xdd, 0xde, 0xdf, 0xbb, 0xfc, 0xbd, 0xfd, 0xfe, /* 0xb8-0xbf */ 0xc0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xc0-0xc7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xc8-0xcf */ 0xf0, 0xf1, 0x00, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xd0-0xd7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x00, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0x00, 0x00, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0x00, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0x00, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xb6, 0xb8, 0xb9, 0xba, /* 0xd8-0xdf */ 0xe0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xe0-0xe7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xe8-0xef */ 0xd0, 0xd1, 0xd3, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xf0-0xf7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xbc, 0xbe, 0xbf, 0x00, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "iso8859-7", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_iso8859_7(void) { return register_nls(&table); } static void __exit exit_nls_iso8859_7(void) { unregister_nls(&table); } module_init(init_nls_iso8859_7) module_exit(exit_nls_iso8859_7) MODULE_LICENSE("Dual BSD/GPL");
26 3 24 26 15 2 5 12 12 1 5 5 5 5 5 5 1 5 5 1 1 2 2 1 5 3 3 3 1 1 1 1 1 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS direct block pointer. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. */ #include <linux/errno.h> #include "nilfs.h" #include "page.h" #include "direct.h" #include "alloc.h" #include "dat.h" static inline __le64 *nilfs_direct_dptrs(const struct nilfs_bmap *direct) { return (__le64 *) ((struct nilfs_direct_node *)direct->b_u.u_data + 1); } static inline __u64 nilfs_direct_get_ptr(const struct nilfs_bmap *direct, __u64 key) { return le64_to_cpu(*(nilfs_direct_dptrs(direct) + key)); } static inline void nilfs_direct_set_ptr(struct nilfs_bmap *direct, __u64 key, __u64 ptr) { *(nilfs_direct_dptrs(direct) + key) = cpu_to_le64(ptr); } static int nilfs_direct_lookup(const struct nilfs_bmap *direct, __u64 key, int level, __u64 *ptrp) { __u64 ptr; if (key > NILFS_DIRECT_KEY_MAX || level != 1) return -ENOENT; ptr = nilfs_direct_get_ptr(direct, key); if (ptr == NILFS_BMAP_INVALID_PTR) return -ENOENT; *ptrp = ptr; return 0; } static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, __u64 key, __u64 *ptrp, unsigned int maxblocks) { struct inode *dat = NULL; __u64 ptr, ptr2; sector_t blocknr; int ret, cnt; if (key > NILFS_DIRECT_KEY_MAX) return -ENOENT; ptr = nilfs_direct_get_ptr(direct, key); if (ptr == NILFS_BMAP_INVALID_PTR) return -ENOENT; if (NILFS_BMAP_USE_VBN(direct)) { dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) return ret; ptr = blocknr; } maxblocks = min_t(unsigned int, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1); for (cnt = 1; cnt < maxblocks && (ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) != NILFS_BMAP_INVALID_PTR; cnt++) { if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) return ret; ptr2 = blocknr; } if (ptr2 != ptr + cnt) break; } *ptrp = ptr; return cnt; } static __u64 nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key) { __u64 ptr; ptr = nilfs_bmap_find_target_seq(direct, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; /* block group */ return nilfs_bmap_find_target_in_group(direct); } static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) { union nilfs_bmap_ptr_req req; struct inode *dat = NULL; struct buffer_head *bh; int ret; if (key > NILFS_DIRECT_KEY_MAX) return -ENOENT; if (nilfs_direct_get_ptr(bmap, key) != NILFS_BMAP_INVALID_PTR) return -EEXIST; if (NILFS_BMAP_USE_VBN(bmap)) { req.bpr_ptr = nilfs_direct_find_target_v(bmap, key); dat = nilfs_bmap_get_dat(bmap); } ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat); if (!ret) { /* ptr must be a pointer to a buffer head. */ bh = (struct buffer_head *)((unsigned long)ptr); set_buffer_nilfs_volatile(bh); nilfs_bmap_commit_alloc_ptr(bmap, &req, dat); nilfs_direct_set_ptr(bmap, key, req.bpr_ptr); if (!nilfs_bmap_dirty(bmap)) nilfs_bmap_set_dirty(bmap); if (NILFS_BMAP_USE_VBN(bmap)) nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); nilfs_inode_add_blocks(bmap->b_inode, 1); } return ret; } static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) { union nilfs_bmap_ptr_req req; struct inode *dat; int ret; if (key > NILFS_DIRECT_KEY_MAX || nilfs_direct_get_ptr(bmap, key) == NILFS_BMAP_INVALID_PTR) return -ENOENT; dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; req.bpr_ptr = nilfs_direct_get_ptr(bmap, key); ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat); if (!ret) { nilfs_bmap_commit_end_ptr(bmap, &req, dat); nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); nilfs_inode_sub_blocks(bmap->b_inode, 1); } return ret; } static int nilfs_direct_seek_key(const struct nilfs_bmap *direct, __u64 start, __u64 *keyp) { __u64 key; for (key = start; key <= NILFS_DIRECT_KEY_MAX; key++) { if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) { *keyp = key; return 0; } } return -ENOENT; } static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp) { __u64 key, lastkey; lastkey = NILFS_DIRECT_KEY_MAX + 1; for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) lastkey = key; if (lastkey == NILFS_DIRECT_KEY_MAX + 1) return -ENOENT; *keyp = lastkey; return 0; } static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key) { return key > NILFS_DIRECT_KEY_MAX; } static int nilfs_direct_gather_data(struct nilfs_bmap *direct, __u64 *keys, __u64 *ptrs, int nitems) { __u64 key; __u64 ptr; int n; if (nitems > NILFS_DIRECT_NBLOCKS) nitems = NILFS_DIRECT_NBLOCKS; n = 0; for (key = 0; key < nitems; key++) { ptr = nilfs_direct_get_ptr(direct, key); if (ptr != NILFS_BMAP_INVALID_PTR) { keys[n] = key; ptrs[n] = ptr; n++; } } return n; } int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, __u64 key, __u64 *keys, __u64 *ptrs, int n) { __le64 *dptrs; int ret, i, j; /* no need to allocate any resource for conversion */ /* delete */ ret = bmap->b_ops->bop_delete(bmap, key); if (ret < 0) return ret; /* free resources */ if (bmap->b_ops->bop_clear != NULL) bmap->b_ops->bop_clear(bmap); /* convert */ dptrs = nilfs_direct_dptrs(bmap); for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { if ((j < n) && (i == keys[j])) { dptrs[i] = (i != key) ? cpu_to_le64(ptrs[j]) : NILFS_BMAP_INVALID_PTR; j++; } else dptrs[i] = NILFS_BMAP_INVALID_PTR; } nilfs_direct_init(bmap); return 0; } static int nilfs_direct_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) { struct nilfs_palloc_req oldreq, newreq; struct inode *dat; __u64 key; __u64 ptr; int ret; if (!NILFS_BMAP_USE_VBN(bmap)) return 0; dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; ret = nilfs_dat_prepare_update(dat, &oldreq, &newreq); if (ret < 0) return ret; nilfs_dat_commit_update(dat, &oldreq, &newreq, bmap->b_ptr_type == NILFS_BMAP_PTR_VS); set_buffer_nilfs_volatile(bh); nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr); } else ret = nilfs_dat_mark_dirty(dat, ptr); return ret; } static int nilfs_direct_assign_v(struct nilfs_bmap *direct, __u64 key, __u64 ptr, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct inode *dat = nilfs_bmap_get_dat(direct); union nilfs_bmap_ptr_req req; int ret; req.bpr_ptr = ptr; ret = nilfs_dat_prepare_start(dat, &req.bpr_req); if (!ret) { nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); binfo->bi_v.bi_blkoff = cpu_to_le64(key); } return ret; } static int nilfs_direct_assign_p(struct nilfs_bmap *direct, __u64 key, __u64 ptr, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { nilfs_direct_set_ptr(direct, key, blocknr); binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = 0; memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } static int nilfs_direct_assign(struct nilfs_bmap *bmap, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { __u64 key; __u64 ptr; key = nilfs_bmap_data_get_key(bmap, *bh); if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { nilfs_crit(bmap->b_inode->i_sb, "%s (ino=%lu): invalid key: %llu", __func__, bmap->b_inode->i_ino, (unsigned long long)key); return -EINVAL; } ptr = nilfs_direct_get_ptr(bmap, key); if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { nilfs_crit(bmap->b_inode->i_sb, "%s (ino=%lu): invalid pointer: %llu", __func__, bmap->b_inode->i_ino, (unsigned long long)ptr); return -EINVAL; } return NILFS_BMAP_USE_VBN(bmap) ? nilfs_direct_assign_v(bmap, key, ptr, bh, blocknr, binfo) : nilfs_direct_assign_p(bmap, key, ptr, bh, blocknr, binfo); } static const struct nilfs_bmap_operations nilfs_direct_ops = { .bop_lookup = nilfs_direct_lookup, .bop_lookup_contig = nilfs_direct_lookup_contig, .bop_insert = nilfs_direct_insert, .bop_delete = nilfs_direct_delete, .bop_clear = NULL, .bop_propagate = nilfs_direct_propagate, .bop_lookup_dirty_buffers = NULL, .bop_assign = nilfs_direct_assign, .bop_mark = NULL, .bop_seek_key = nilfs_direct_seek_key, .bop_last_key = nilfs_direct_last_key, .bop_check_insert = nilfs_direct_check_insert, .bop_check_delete = NULL, .bop_gather_data = nilfs_direct_gather_data, }; int nilfs_direct_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_direct_ops; return 0; }
64 64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-add_2.c - MPI helper functions * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" #include "longlong.h" mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr, mpi_size_t size) { mpi_limb_t x, y, cy; mpi_size_t j; /* The loop counter and index J goes from -SIZE to -1. This way the loop becomes faster. */ j = -size; /* Offset the base pointers to compensate for the negative indices. */ s1_ptr -= j; s2_ptr -= j; res_ptr -= j; cy = 0; do { y = s2_ptr[j]; x = s1_ptr[j]; y += cy; /* add previous carry to subtrahend */ cy = y < cy; /* get out carry from that addition */ y = x - y; /* main subtract */ cy += y > x; /* get out carry from the subtract, combine */ res_ptr[j] = y; } while (++j); return cy; }
1071 7 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 /* SPDX-License-Identifier: GPL-2.0 */ /* linux/net/inet/arp.h */ #ifndef _ARP_H #define _ARP_H #include <linux/if_arp.h> #include <linux/hash.h> #include <net/neighbour.h> extern struct neigh_table arp_tbl; static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 *hash_rnd) { u32 key = *(const u32 *)pkey; u32 val = key ^ hash32_ptr(dev); return val * hash_rnd[0]; } #ifdef CONFIG_INET static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) key = INADDR_ANY; return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } #else static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { return NULL; } #endif static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) { struct neighbour *n; rcu_read_lock(); n = __ipv4_neigh_lookup_noref(dev, key); if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock(); return n; } static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) { struct neighbour *n; rcu_read_lock(); n = __ipv4_neigh_lookup_noref(dev, key); neigh_confirm(n); rcu_read_unlock(); } void arp_init(void); int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, const unsigned char *dest_hw, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, const unsigned char *dest_hw, const unsigned char *src_hw, const unsigned char *target_hw); void arp_xmit(struct sk_buff *skb); #endif /* _ARP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Network filesystem support services. * * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See: * * Documentation/filesystems/netfs_library.rst * * for a description of the network filesystem interface declared here. */ #ifndef _LINUX_NETFS_H #define _LINUX_NETFS_H #include <linux/workqueue.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/uio.h> enum netfs_sreq_ref_trace; /* * Overload PG_private_2 to give us PG_fscache - this is used to indicate that * a page is currently backed by a local disk cache */ #define folio_test_fscache(folio) folio_test_private_2(folio) #define PageFsCache(page) PagePrivate2((page)) #define SetPageFsCache(page) SetPagePrivate2((page)) #define ClearPageFsCache(page) ClearPagePrivate2((page)) #define TestSetPageFsCache(page) TestSetPagePrivate2((page)) #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) /** * folio_start_fscache - Start an fscache write on a folio. * @folio: The folio. * * Call this function before writing a folio to a local cache. Starting a * second write before the first one finishes is not allowed. */ static inline void folio_start_fscache(struct folio *folio) { VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); folio_get(folio); folio_set_private_2(folio); } /** * folio_end_fscache - End an fscache write on a folio. * @folio: The folio. * * Call this function after the folio has been written to the local cache. * This will wake any sleepers waiting on this folio. */ static inline void folio_end_fscache(struct folio *folio) { folio_end_private_2(folio); } /** * folio_wait_fscache - Wait for an fscache write on this folio to end. * @folio: The folio. * * If this folio is currently being written to a local cache, wait for * the write to finish. Another write may start after this one finishes, * unless the caller holds the folio lock. */ static inline void folio_wait_fscache(struct folio *folio) { folio_wait_private_2(folio); } /** * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. * @folio: The folio. * * If this folio is currently being written to a local cache, wait * for the write to finish or for a fatal signal to be received. * Another write may start after this one finishes, unless the caller * holds the folio lock. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ static inline int folio_wait_fscache_killable(struct folio *folio) { return folio_wait_private_2_killable(folio); } static inline void set_page_fscache(struct page *page) { folio_start_fscache(page_folio(page)); } static inline void end_page_fscache(struct page *page) { folio_end_private_2(page_folio(page)); } static inline void wait_on_page_fscache(struct page *page) { folio_wait_private_2(page_folio(page)); } static inline int wait_on_page_fscache_killable(struct page *page) { return folio_wait_private_2_killable(page_folio(page)); } /* Marks used on xarray-based buffers */ #define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ #define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ enum netfs_io_source { NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, NETFS_READ_FROM_CACHE, NETFS_INVALID_READ, NETFS_UPLOAD_TO_SERVER, NETFS_WRITE_TO_CACHE, NETFS_INVALID_WRITE, } __mode(byte); typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, bool was_async); /* * Per-inode context. This wraps the VFS inode. */ struct netfs_inode { struct inode inode; /* The VFS inode */ const struct netfs_request_ops *ops; #if IS_ENABLED(CONFIG_FSCACHE) struct fscache_cookie *cache; #endif loff_t remote_i_size; /* Size of the remote file */ loff_t zero_point; /* Size after which we assume there's no data * on the server */ unsigned long flags; #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ #define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ }; /* * A netfs group - for instance a ceph snap. This is marked on dirty pages and * pages marked with a group must be flushed before they can be written under * the domain of another group. */ struct netfs_group { refcount_t ref; void (*free)(struct netfs_group *netfs_group); }; /* * Information about a dirty page (attached only if necessary). * folio->private */ struct netfs_folio { struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */ unsigned int dirty_offset; /* Write-streaming dirty data offset */ unsigned int dirty_len; /* Write-streaming dirty data length */ }; #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ static inline struct netfs_folio *netfs_folio_info(struct folio *folio) { void *priv = folio_get_private(folio); if ((unsigned long)priv & NETFS_FOLIO_INFO) return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); return NULL; } static inline struct netfs_group *netfs_folio_group(struct folio *folio) { struct netfs_folio *finfo; void *priv = folio_get_private(folio); finfo = netfs_folio_info(folio); if (finfo) return finfo->netfs_group; return priv; } /* * Resources required to do operations on a cache. */ struct netfs_cache_resources { const struct netfs_cache_ops *ops; void *cache_priv; void *cache_priv2; unsigned int debug_id; /* Cookie debug ID */ unsigned int inval_counter; /* object->inval_counter at begin_op */ }; /* * Descriptor for a single component subrequest. Each operation represents an * individual read/write from/to a server, a cache, a journal, etc.. * * The buffer iterator is persistent for the life of the subrequest struct and * the pages it points to can be relied on to exist for the duration. */ struct netfs_io_subrequest { struct netfs_io_request *rreq; /* Supervising I/O request */ struct work_struct work; struct list_head rreq_link; /* Link in rreq->subrequests */ struct iov_iter io_iter; /* Iterator for this subrequest */ loff_t start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ enum netfs_io_source source; /* Where to read from/write to */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ #define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ }; enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_DIO_WRITE, /* This is a direct I/O write */ nr__netfs_io_origin } __mode(byte); /* * Descriptor for an I/O helper request. This is used to make multiple I/O * operations to a variety of data stores and then stitch the result together. */ struct netfs_io_request { union { struct work_struct work; struct rcu_head rcu; }; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ struct iov_iter iter; /* Unencrypted-side iterator */ struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ void *netfs_priv; /* Private data for the netfs */ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; unsigned int rsize; /* Maximum read size (0 for none) */ unsigned int wsize; /* Maximum write size (0 for none) */ unsigned int subreq_counter; /* Next subreq->debug_index */ atomic_t nr_outstanding; /* Number of ops in progress */ atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ size_t upper_len; /* Length can be extended to here */ size_t transferred; /* Amount to be indicated as transferred */ short error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ loff_t i_size; /* Size of the file */ loff_t start; /* Start position */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ refcount_t ref; unsigned long flags; #define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ #define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ const struct netfs_request_ops *netfs_ops; void (*cleanup)(struct netfs_io_request *req); }; /* * Operations the network filesystem can/must provide to the helpers. */ struct netfs_request_ops { unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ int (*init_request)(struct netfs_io_request *rreq, struct file *file); void (*free_request)(struct netfs_io_request *rreq); void (*free_subrequest)(struct netfs_io_subrequest *rreq); /* Read request handling */ void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); /* Modification handling */ void (*update_i_size)(struct inode *inode, loff_t i_size); /* Write request handling */ void (*create_write_requests)(struct netfs_io_request *wreq, loff_t start, size_t len); void (*invalidate_cache)(struct netfs_io_request *wreq); }; /* * How to handle reading from a hole. */ enum netfs_read_from_hole { NETFS_READ_HOLE_IGNORE, NETFS_READ_HOLE_CLEAR, NETFS_READ_HOLE_FAIL, }; /* * Table of operations for access to a cache. */ struct netfs_cache_ops { /* End an operation */ void (*end_operation)(struct netfs_cache_resources *cres); /* Read data from the cache */ int (*read)(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv); /* Write data to the cache */ int (*write)(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, netfs_io_terminated_t term_func, void *term_func_priv); /* Expand readahead request */ void (*expand_readahead)(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, loff_t i_size); /* Prepare a read operation, shortening it to a cached/uncached * boundary as appropriate. */ enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq, loff_t i_size); /* Prepare a write operation, working out what part of the write we can * actually do. */ int (*prepare_write)(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, size_t upper_len, loff_t i_size, bool no_space_allocated_yet); /* Prepare an on-demand read operation, shortening it to a cached/uncached * boundary as appropriate. */ enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres, loff_t start, size_t *_len, loff_t i_size, unsigned long *_flags, ino_t ino); /* Query the occupancy of the cache in a region, returning where the * next chunk of data starts and how long it is. */ int (*query_occupancy)(struct netfs_cache_resources *cres, loff_t start, size_t len, size_t granularity, loff_t *_data_start, size_t *_data_len); }; /* High-level read API. */ ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); /* High-level write API */ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct netfs_group *netfs_group); ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, struct netfs_group *netfs_group); ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from); ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); /* Address operations API */ struct readahead_control; void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); int netfs_write_begin(struct netfs_inode *, struct file *, struct address_space *, loff_t pos, unsigned int len, struct folio **, void **fsdata); int netfs_writepages(struct address_space *mapping, struct writeback_control *wbc); bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); void netfs_clear_inode_writeback(struct inode *inode, const void *aux); void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool netfs_release_folio(struct folio *folio, gfp_t gfp); int netfs_launder_folio(struct folio *folio); /* VMA operations API. */ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); /* (Sub)request management API. */ void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); struct netfs_io_subrequest *netfs_create_write_request( struct netfs_io_request *wreq, enum netfs_io_source dest, loff_t start, size_t len, work_func_t worker); void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, bool was_async); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); void netfs_end_io_read(struct inode *inode); int netfs_start_io_write(struct inode *inode); void netfs_end_io_write(struct inode *inode); int netfs_start_io_direct(struct inode *inode); void netfs_end_io_direct(struct inode *inode); /** * netfs_inode - Get the netfs inode context from the inode * @inode: The inode to query * * Get the netfs lib inode context from the network filesystem's inode. The * context struct is expected to directly follow on from the VFS inode struct. */ static inline struct netfs_inode *netfs_inode(struct inode *inode) { return container_of(inode, struct netfs_inode, inode); } /** * netfs_inode_init - Initialise a netfslib inode context * @ctx: The netfs inode to initialise * @ops: The netfs's operations list * @use_zero_point: True to use the zero_point read optimisation * * Initialise the netfs library context struct. This is expected to follow on * directly from the VFS inode struct. */ static inline void netfs_inode_init(struct netfs_inode *ctx, const struct netfs_request_ops *ops, bool use_zero_point) { ctx->ops = ops; ctx->remote_i_size = i_size_read(&ctx->inode); ctx->zero_point = LLONG_MAX; ctx->flags = 0; #if IS_ENABLED(CONFIG_FSCACHE) ctx->cache = NULL; #endif /* ->releasepage() drives zero_point */ if (use_zero_point) { ctx->zero_point = ctx->remote_i_size; mapping_set_release_always(ctx->inode.i_mapping); } } /** * netfs_resize_file - Note that a file got resized * @ctx: The netfs inode being resized * @new_i_size: The new file size * @changed_on_server: The change was applied to the server * * Inform the netfs lib that a file got resized so that it can adjust its state. */ static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, bool changed_on_server) { if (changed_on_server) ctx->remote_i_size = new_i_size; if (new_i_size < ctx->zero_point) ctx->zero_point = new_i_size; } /** * netfs_i_cookie - Get the cache cookie from the inode * @ctx: The netfs inode to query * * Get the caching cookie (if enabled) from the network filesystem's inode. */ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) { #if IS_ENABLED(CONFIG_FSCACHE) return ctx->cache; #else return NULL; #endif } #endif /* _LINUX_NETFS_H */
11 11 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 // SPDX-License-Identifier: GPL-2.0-only /* * netfilter module to enforce network quotas * * Sam Johnston <samj@samj.net> */ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_quota.h> #include <linux/module.h> struct xt_quota_priv { spinlock_t lock; uint64_t quota; }; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); MODULE_DESCRIPTION("Xtables: countdown quota match"); MODULE_ALIAS("ipt_quota"); MODULE_ALIAS("ip6t_quota"); static bool quota_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct xt_quota_info *q = (void *)par->matchinfo; struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh(&priv->lock); if (priv->quota >= skb->len) { priv->quota -= skb->len; ret = !ret; } else { /* we do not allow even small packets from now on */ priv->quota = 0; } spin_unlock_bh(&priv->lock); return ret; } static int quota_mt_check(const struct xt_mtchk_param *par) { struct xt_quota_info *q = par->matchinfo; if (q->flags & ~XT_QUOTA_MASK) return -EINVAL; q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); if (q->master == NULL) return -ENOMEM; spin_lock_init(&q->master->lock); q->master->quota = q->quota; return 0; } static void quota_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_quota_info *q = par->matchinfo; kfree(q->master); } static struct xt_match quota_mt_reg __read_mostly = { .name = "quota", .revision = 0, .family = NFPROTO_UNSPEC, .match = quota_mt, .checkentry = quota_mt_check, .destroy = quota_mt_destroy, .matchsize = sizeof(struct xt_quota_info), .usersize = offsetof(struct xt_quota_info, master), .me = THIS_MODULE, }; static int __init quota_mt_init(void) { return xt_register_match(&quota_mt_reg); } static void __exit quota_mt_exit(void) { xt_unregister_match(&quota_mt_reg); } module_init(quota_mt_init); module_exit(quota_mt_exit);
21 84 207 208 12 195 68 65 48 20 2 2 196 194 196 289 4 287 4 287 43 5 39 14 32 128 167 2 5 1 4 3 8 81 20 61 19 18 1 43 115 115 55 55 5 43 55 29 29 5 14 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 // SPDX-License-Identifier: GPL-2.0-only /* * Pluggable TCP congestion control support and newReno * congestion control. * Based on ideas from I/O scheduler support and Web100. * * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> */ #define pr_fmt(fmt) "TCP: " fmt #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> #include <linux/jhash.h> #include <net/tcp.h> #include <trace/events/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); /* Simple linear search, don't expect many entries! */ struct tcp_congestion_ops *tcp_ca_find(const char *name) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (strcmp(e->name, name) == 0) return e; } return NULL; } void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { struct inet_connection_sock *icsk = inet_csk(sk); trace_tcp_cong_state_set(sk, ca_state); if (icsk->icsk_ca_ops->set_state) icsk->icsk_ca_ops->set_state(sk, ca_state); icsk->icsk_ca_state = ca_state; } /* Must be called with rcu lock held */ static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, const char *name) { struct tcp_congestion_ops *ca = tcp_ca_find(name); #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); request_module("tcp_%s", name); rcu_read_lock(); ca = tcp_ca_find(name); } #endif return ca; } /* Simple linear search, not much in here. */ struct tcp_congestion_ops *tcp_ca_find_key(u32 key) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (e->key == key) return e; } return NULL; } int tcp_validate_congestion_control(struct tcp_congestion_ops *ca) { /* all algorithms must implement these */ if (!ca->ssthresh || !ca->undo_cwnd || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } return 0; } /* Attach new congestion control algorithm to the list * of available options. */ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret; ret = tcp_validate_congestion_control(ca); if (ret) return ret; ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { pr_notice("%s already registered or non-unique key\n", ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); pr_debug("%s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); return ret; } EXPORT_SYMBOL_GPL(tcp_register_congestion_control); /* * Remove congestion control algorithm, called from * the module's remove function. Module ref counts are used * to ensure that this can't be done till all sockets using * that method are closed. */ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) { spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); /* Wait for outstanding readers to complete before the * module gets removed entirely. * * A try_module_get() should fail by now as our module is * in "going" state since no refs are held anymore and * module_exit() handler being called. */ synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Replace a registered old ca with a new one. * * The new ca must have the same name as the old one, that has been * registered. */ int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) { struct tcp_congestion_ops *existing; int ret; ret = tcp_validate_congestion_control(ca); if (ret) return ret; ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); existing = tcp_ca_find_key(old_ca->key); if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) { pr_notice("%s not registered or non-unique key\n", ca->name); ret = -EINVAL; } else if (existing != old_ca) { pr_notice("invalid old congestion control algorithm to replace\n"); ret = -EINVAL; } else { /* Add the new one before removing the old one to keep * one implementation available all the time. */ list_add_tail_rcu(&ca->list, &tcp_cong_list); list_del_rcu(&existing->list); pr_debug("%s updated\n", ca->name); } spin_unlock(&tcp_cong_list_lock); /* Wait for outstanding readers to complete before the * module or struct_ops gets removed entirely. */ if (!ret) synchronize_rcu(); return ret; } u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; might_sleep(); rcu_read_lock(); ca = tcp_ca_find_autoload(net, name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; } rcu_read_unlock(); return key; } char *tcp_ca_get_name_by_key(u32 key, char *buffer) { const struct tcp_congestion_ops *ca; char *ret = NULL; rcu_read_lock(); ca = tcp_ca_find_key(key); if (ca) ret = strncpy(buffer, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); return ret; } /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; rcu_read_lock(); ca = rcu_dereference(net->ipv4.tcp_congestion_control); if (unlikely(!bpf_try_module_get(ca, ca->owner))) ca = &tcp_reno; icsk->icsk_ca_ops = ca; rcu_read_unlock(); memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); } void tcp_init_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); icsk->icsk_ca_initialized = 1; } static void tcp_reinit_congestion_control(struct sock *sk, const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } /* Manage refcounts on socket close. */ void tcp_cleanup_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner); } /* Used by sysctl to change default congestion control */ int tcp_set_default_congestion_control(struct net *net, const char *name) { struct tcp_congestion_ops *ca; const struct tcp_congestion_ops *prev; int ret; rcu_read_lock(); ca = tcp_ca_find_autoload(net, name); if (!ca) { ret = -ENOENT; } else if (!bpf_try_module_get(ca, ca->owner)) { ret = -EBUSY; } else if (!net_eq(net, &init_net) && !(ca->flags & TCP_CONG_NON_RESTRICTED)) { /* Only init netns can set default to a restricted algorithm */ ret = -EPERM; } else { prev = xchg(&net->ipv4.tcp_congestion_control, ca); if (prev) bpf_module_put(prev, prev->owner); ca->flags |= TCP_CONG_NON_RESTRICTED; ret = 0; } rcu_read_unlock(); return ret; } /* Set default value from kernel configuration at bootup */ static int __init tcp_congestion_default(void) { return tcp_set_default_congestion_control(&init_net, CONFIG_DEFAULT_TCP_CONG); } late_initcall(tcp_congestion_default); /* Build string with list of available congestion control values */ void tcp_get_available_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); if (WARN_ON_ONCE(offs >= maxlen)) break; } rcu_read_unlock(); } /* Get current default congestion control */ void tcp_get_default_congestion_control(struct net *net, char *name) { const struct tcp_congestion_ops *ca; rcu_read_lock(); ca = rcu_dereference(net->ipv4.tcp_congestion_control); strncpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); } /* Built list of non-restricted congestion control values */ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); if (WARN_ON_ONCE(offs >= maxlen)) break; } rcu_read_unlock(); } /* Change list of non-restricted congestion control */ int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; char *saved_clone, *clone, *name; int ret = 0; saved_clone = clone = kstrdup(val, GFP_USER); if (!clone) return -ENOMEM; spin_lock(&tcp_cong_list_lock); /* pass 1 check for bad entries */ while ((name = strsep(&clone, " ")) && *name) { ca = tcp_ca_find(name); if (!ca) { ret = -ENOENT; goto out; } } /* pass 2 clear old values */ list_for_each_entry_rcu(ca, &tcp_cong_list, list) ca->flags &= ~TCP_CONG_NON_RESTRICTED; /* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) ca->flags |= TCP_CONG_NON_RESTRICTED; } out: spin_unlock(&tcp_cong_list_lock); kfree(saved_clone); return ret; } /* Change congestion control for socket. If load is false, then it is the * responsibility of the caller to call tcp_init_congestion_control or * tcp_reinit_congestion_control (if the current congestion control was * already initialized. */ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool cap_net_admin) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; int err = 0; if (icsk->icsk_ca_dst_locked) return -EPERM; rcu_read_lock(); if (!load) ca = tcp_ca_find(name); else ca = tcp_ca_find_autoload(sock_net(sk), name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) err = -EPERM; else if (!bpf_try_module_get(ca, ca->owner)) err = -EBUSY; else tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; } /* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but * something better;) a packet is only considered (s)acked in its entirety to * defend the ACK attacks described in the RFC. Slow start processes a stretch * ACK of degree N as if N acks of degree 1 are received back to back except * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and * returns the leftover acks to adjust cwnd in congestion avoidance mode. */ __bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh); acked -= cwnd - tcp_snd_cwnd(tp); tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); return acked; } EXPORT_SYMBOL_GPL(tcp_slow_start); /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), * for every packet that was ACKed. */ __bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { /* If credits accumulated at a higher w, apply them gently now. */ if (tp->snd_cwnd_cnt >= w) { tp->snd_cwnd_cnt = 0; tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { u32 delta = tp->snd_cwnd_cnt / w; tp->snd_cwnd_cnt -= delta * w; tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta); } tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp)); } EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* * TCP Reno congestion control * This is special case used for fallback as well. */ /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ __bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); if (!tcp_is_cwnd_limited(sk)) return; /* In "safe" area, increase. */ if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } /* In dangerous area, increase slowly. */ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ __bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max(tcp_snd_cwnd(tp) >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); __bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max(tcp_snd_cwnd(tp), tp->prior_cwnd); } EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = tcp_reno_undo_cwnd, };
536 102 77 1239 1239 1055 5639 3254 11700 904 12133 13 13 12808 12820 7115 11575 45 5451 5030 5038 1783 1786 3585 2111 3523 3109 1479 4853 69 2755 3301 1036 744 367 366 316 82 174 251 367 251 1633 942 715 880 1232 31 2 6 31 31 31 4887 2179 1585 2755 2214 3950 2215 2212 1 15 536 2215 418 2213 2088 747 2212 5229 4887 4892 4431 3526 5235 2548 2270 277 5 3299 4220 167 166 25 167 1456 4904 4506 1814 5796 5232 2275 232 5589 5 6 3238 4879 77 5229 2050 5586 25 5589 4531 72 72 1813 13 1864 4923 173 1327 2107 996 1263 1316 1047 609 1404 2895 1983 856 640 2146 513 2149 2141 2146 10 10 10 10 10 10 10 10 10 10 17 2 13 13 4 16 16 7 2 16 5 1 5 554 180 528 298 187 20 249 242 24 251 5798 628 263 1325 5578 2142 1239 2461 1230 578 1446 4318 223 547 217 2982 2506 1497 722 544 2982 2983 1890 555 2539 1215 2464 963 1717 2530 6 18 2530 1968 917 1897 1898 624 1856 20 20 9778 9765 1360 1083 790 790 790 1552 219 1341 1549 1545 106 40 66 106 446 100 340 11 447 7 7 5 7 7 6 2212 2205 6 21 1281 1280 1280 1282 1240 1240 1240 1236 1277 1279 61 61 173 173 174 173 173 3881 602 3322 46 3379 902 683 17 666 659 109 109 658 108 160 137 23 157 6 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 // SPDX-License-Identifier: GPL-2.0+ /* * XArray implementation * Copyright (c) 2017-2018 Microsoft Corporation * Copyright (c) 2018-2020 Oracle * Author: Matthew Wilcox <willy@infradead.org> */ #include <linux/bitmap.h> #include <linux/export.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/xarray.h> #include "radix-tree.h" /* * Coding conventions in this file: * * @xa is used to refer to the entire xarray. * @xas is the 'xarray operation state'. It may be either a pointer to * an xa_state, or an xa_state stored on the stack. This is an unfortunate * ambiguity. * @index is the index of the entry being operated on * @mark is an xa_mark_t; a small number indicating one of the mark bits. * @node refers to an xa_node; usually the primary one being operated on by * this function. * @offset is the index into the slots array inside an xa_node. * @parent refers to the @xa_node closer to the head than @node. * @entry refers to something stored in a slot in the xarray */ static inline unsigned int xa_lock_type(const struct xarray *xa) { return (__force unsigned int)xa->xa_flags & 3; } static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type) { if (lock_type == XA_LOCK_IRQ) xas_lock_irq(xas); else if (lock_type == XA_LOCK_BH) xas_lock_bh(xas); else xas_lock(xas); } static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type) { if (lock_type == XA_LOCK_IRQ) xas_unlock_irq(xas); else if (lock_type == XA_LOCK_BH) xas_unlock_bh(xas); else xas_unlock(xas); } static inline bool xa_track_free(const struct xarray *xa) { return xa->xa_flags & XA_FLAGS_TRACK_FREE; } static inline bool xa_zero_busy(const struct xarray *xa) { return xa->xa_flags & XA_FLAGS_ZERO_BUSY; } static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) { if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) xa->xa_flags |= XA_FLAGS_MARK(mark); } static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark) { if (xa->xa_flags & XA_FLAGS_MARK(mark)) xa->xa_flags &= ~(XA_FLAGS_MARK(mark)); } static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark) { return node->marks[(__force unsigned)mark]; } static inline bool node_get_mark(struct xa_node *node, unsigned int offset, xa_mark_t mark) { return test_bit(offset, node_marks(node, mark)); } /* returns true if the bit was set */ static inline bool node_set_mark(struct xa_node *node, unsigned int offset, xa_mark_t mark) { return __test_and_set_bit(offset, node_marks(node, mark)); } /* returns true if the bit was set */ static inline bool node_clear_mark(struct xa_node *node, unsigned int offset, xa_mark_t mark) { return __test_and_clear_bit(offset, node_marks(node, mark)); } static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark) { return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE); } static inline void node_mark_all(struct xa_node *node, xa_mark_t mark) { bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE); } #define mark_inc(mark) do { \ mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \ } while (0) /* * xas_squash_marks() - Merge all marks to the first entry * @xas: Array operation state. * * Set a mark on the first entry if any entry has it set. Clear marks on * all sibling entries. */ static void xas_squash_marks(const struct xa_state *xas) { unsigned int mark = 0; unsigned int limit = xas->xa_offset + xas->xa_sibs + 1; if (!xas->xa_sibs) return; do { unsigned long *marks = xas->xa_node->marks[mark]; if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit) continue; __set_bit(xas->xa_offset, marks); bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs); } while (mark++ != (__force unsigned)XA_MARK_MAX); } /* extracts the offset within this node from the index */ static unsigned int get_offset(unsigned long index, struct xa_node *node) { return (index >> node->shift) & XA_CHUNK_MASK; } static void xas_set_offset(struct xa_state *xas) { xas->xa_offset = get_offset(xas->xa_index, xas->xa_node); } /* move the index either forwards (find) or backwards (sibling slot) */ static void xas_move_index(struct xa_state *xas, unsigned long offset) { unsigned int shift = xas->xa_node->shift; xas->xa_index &= ~XA_CHUNK_MASK << shift; xas->xa_index += offset << shift; } static void xas_next_offset(struct xa_state *xas) { xas->xa_offset++; xas_move_index(xas, xas->xa_offset); } static void *set_bounds(struct xa_state *xas) { xas->xa_node = XAS_BOUNDS; return NULL; } /* * Starts a walk. If the @xas is already valid, we assume that it's on * the right path and just return where we've got to. If we're in an * error state, return NULL. If the index is outside the current scope * of the xarray, return NULL without changing @xas->xa_node. Otherwise * set @xas->xa_node to NULL and return the current head of the array. */ static void *xas_start(struct xa_state *xas) { void *entry; if (xas_valid(xas)) return xas_reload(xas); if (xas_error(xas)) return NULL; entry = xa_head(xas->xa); if (!xa_is_node(entry)) { if (xas->xa_index) return set_bounds(xas); } else { if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK) return set_bounds(xas); } xas->xa_node = NULL; return entry; } static void *xas_descend(struct xa_state *xas, struct xa_node *node) { unsigned int offset = get_offset(xas->xa_index, node); void *entry = xa_entry(xas->xa, node, offset); xas->xa_node = node; while (xa_is_sibling(entry)) { offset = xa_to_sibling(entry); entry = xa_entry(xas->xa, node, offset); if (node->shift && xa_is_node(entry)) entry = XA_RETRY_ENTRY; } xas->xa_offset = offset; return entry; } /** * xas_load() - Load an entry from the XArray (advanced). * @xas: XArray operation state. * * Usually walks the @xas to the appropriate state to load the entry * stored at xa_index. However, it will do nothing and return %NULL if * @xas is in an error state. xas_load() will never expand the tree. * * If the xa_state is set up to operate on a multi-index entry, xas_load() * may return %NULL or an internal entry, even if there are entries * present within the range specified by @xas. * * Context: Any context. The caller should hold the xa_lock or the RCU lock. * Return: Usually an entry in the XArray, but see description for exceptions. */ void *xas_load(struct xa_state *xas) { void *entry = xas_start(xas); while (xa_is_node(entry)) { struct xa_node *node = xa_to_node(entry); if (xas->xa_shift > node->shift) break; entry = xas_descend(xas, node); if (node->shift == 0) break; } return entry; } EXPORT_SYMBOL_GPL(xas_load); #define XA_RCU_FREE ((struct xarray *)1) static void xa_node_free(struct xa_node *node) { XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); node->array = XA_RCU_FREE; call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } /* * xas_destroy() - Free any resources allocated during the XArray operation. * @xas: XArray operation state. * * Most users will not need to call this function; it is called for you * by xas_nomem(). */ void xas_destroy(struct xa_state *xas) { struct xa_node *next, *node = xas->xa_alloc; while (node) { XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); next = rcu_dereference_raw(node->parent); radix_tree_node_rcu_free(&node->rcu_head); xas->xa_alloc = node = next; } } /** * xas_nomem() - Allocate memory if needed. * @xas: XArray operation state. * @gfp: Memory allocation flags. * * If we need to add new nodes to the XArray, we try to allocate memory * with GFP_NOWAIT while holding the lock, which will usually succeed. * If it fails, @xas is flagged as needing memory to continue. The caller * should drop the lock and call xas_nomem(). If xas_nomem() succeeds, * the caller should retry the operation. * * Forward progress is guaranteed as one node is allocated here and * stored in the xa_state where it will be found by xas_alloc(). More * nodes will likely be found in the slab allocator, but we do not tie * them up here. * * Return: true if memory was needed, and was successfully allocated. */ bool xas_nomem(struct xa_state *xas, gfp_t gfp) { if (xas->xa_node != XA_ERROR(-ENOMEM)) { xas_destroy(xas); return false; } if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!xas->xa_alloc) return false; xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; } EXPORT_SYMBOL_GPL(xas_nomem); /* * __xas_nomem() - Drop locks and allocate memory if needed. * @xas: XArray operation state. * @gfp: Memory allocation flags. * * Internal variant of xas_nomem(). * * Return: true if memory was needed, and was successfully allocated. */ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) __must_hold(xas->xa->xa_lock) { unsigned int lock_type = xa_lock_type(xas->xa); if (xas->xa_node != XA_ERROR(-ENOMEM)) { xas_destroy(xas); return false; } if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; if (gfpflags_allow_blocking(gfp)) { xas_unlock_type(xas, lock_type); xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); xas_lock_type(xas, lock_type); } else { xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); } if (!xas->xa_alloc) return false; xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; } static void xas_update(struct xa_state *xas, struct xa_node *node) { if (xas->xa_update) xas->xa_update(node); else XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); } static void *xas_alloc(struct xa_state *xas, unsigned int shift) { struct xa_node *parent = xas->xa_node; struct xa_node *node = xas->xa_alloc; if (xas_invalid(xas)) return NULL; if (node) { xas->xa_alloc = NULL; } else { gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!node) { xas_set_err(xas, -ENOMEM); return NULL; } } if (parent) { node->offset = xas->xa_offset; parent->count++; XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE); xas_update(xas, parent); } XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); node->shift = shift; node->count = 0; node->nr_values = 0; RCU_INIT_POINTER(node->parent, xas->xa_node); node->array = xas->xa; return node; } #ifdef CONFIG_XARRAY_MULTI /* Returns the number of indices covered by a given xa_state */ static unsigned long xas_size(const struct xa_state *xas) { return (xas->xa_sibs + 1UL) << xas->xa_shift; } #endif /* * Use this to calculate the maximum index that will need to be created * in order to add the entry described by @xas. Because we cannot store a * multi-index entry at index 0, the calculation is a little more complex * than you might expect. */ static unsigned long xas_max(struct xa_state *xas) { unsigned long max = xas->xa_index; #ifdef CONFIG_XARRAY_MULTI if (xas->xa_shift || xas->xa_sibs) { unsigned long mask = xas_size(xas) - 1; max |= mask; if (mask == max) max++; } #endif return max; } /* The maximum index that can be contained in the array without expanding it */ static unsigned long max_index(void *entry) { if (!xa_is_node(entry)) return 0; return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1; } static void xas_shrink(struct xa_state *xas) { struct xarray *xa = xas->xa; struct xa_node *node = xas->xa_node; for (;;) { void *entry; XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); if (node->count != 1) break; entry = xa_entry_locked(xa, node, 0); if (!entry) break; if (!xa_is_node(entry) && node->shift) break; if (xa_is_zero(entry) && xa_zero_busy(xa)) entry = NULL; xas->xa_node = XAS_BOUNDS; RCU_INIT_POINTER(xa->xa_head, entry); if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK)) xa_mark_clear(xa, XA_FREE_MARK); node->count = 0; node->nr_values = 0; if (!xa_is_node(entry)) RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY); xas_update(xas, node); xa_node_free(node); if (!xa_is_node(entry)) break; node = xa_to_node(entry); node->parent = NULL; } } /* * xas_delete_node() - Attempt to delete an xa_node * @xas: Array operation state. * * Attempts to delete the @xas->xa_node. This will fail if xa->node has * a non-zero reference count. */ static void xas_delete_node(struct xa_state *xas) { struct xa_node *node = xas->xa_node; for (;;) { struct xa_node *parent; XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); if (node->count) break; parent = xa_parent_locked(xas->xa, node); xas->xa_node = parent; xas->xa_offset = node->offset; xa_node_free(node); if (!parent) { xas->xa->xa_head = NULL; xas->xa_node = XAS_BOUNDS; return; } parent->slots[xas->xa_offset] = NULL; parent->count--; XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE); node = parent; xas_update(xas, node); } if (!node->parent) xas_shrink(xas); } /** * xas_free_nodes() - Free this node and all nodes that it references * @xas: Array operation state. * @top: Node to free * * This node has been removed from the tree. We must now free it and all * of its subnodes. There may be RCU walkers with references into the tree, * so we must replace all entries with retry markers. */ static void xas_free_nodes(struct xa_state *xas, struct xa_node *top) { unsigned int offset = 0; struct xa_node *node = top; for (;;) { void *entry = xa_entry_locked(xas->xa, node, offset); if (node->shift && xa_is_node(entry)) { node = xa_to_node(entry); offset = 0; continue; } if (entry) RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY); offset++; while (offset == XA_CHUNK_SIZE) { struct xa_node *parent; parent = xa_parent_locked(xas->xa, node); offset = node->offset + 1; node->count = 0; node->nr_values = 0; xas_update(xas, node); xa_node_free(node); if (node == top) return; node = parent; } } } /* * xas_expand adds nodes to the head of the tree until it has reached * sufficient height to be able to contain @xas->xa_index */ static int xas_expand(struct xa_state *xas, void *head) { struct xarray *xa = xas->xa; struct xa_node *node = NULL; unsigned int shift = 0; unsigned long max = xas_max(xas); if (!head) { if (max == 0) return 0; while ((max >> shift) >= XA_CHUNK_SIZE) shift += XA_CHUNK_SHIFT; return shift + XA_CHUNK_SHIFT; } else if (xa_is_node(head)) { node = xa_to_node(head); shift = node->shift + XA_CHUNK_SHIFT; } xas->xa_node = NULL; while (max > max_index(head)) { xa_mark_t mark = 0; XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); node = xas_alloc(xas, shift); if (!node) return -ENOMEM; node->count = 1; if (xa_is_value(head)) node->nr_values = 1; RCU_INIT_POINTER(node->slots[0], head); /* Propagate the aggregated mark info to the new child */ for (;;) { if (xa_track_free(xa) && mark == XA_FREE_MARK) { node_mark_all(node, XA_FREE_MARK); if (!xa_marked(xa, XA_FREE_MARK)) { node_clear_mark(node, 0, XA_FREE_MARK); xa_mark_set(xa, XA_FREE_MARK); } } else if (xa_marked(xa, mark)) { node_set_mark(node, 0, mark); } if (mark == XA_MARK_MAX) break; mark_inc(mark); } /* * Now that the new node is fully initialised, we can add * it to the tree */ if (xa_is_node(head)) { xa_to_node(head)->offset = 0; rcu_assign_pointer(xa_to_node(head)->parent, node); } head = xa_mk_node(node); rcu_assign_pointer(xa->xa_head, head); xas_update(xas, node); shift += XA_CHUNK_SHIFT; } xas->xa_node = node; return shift; } /* * xas_create() - Create a slot to store an entry in. * @xas: XArray operation state. * @allow_root: %true if we can store the entry in the root directly * * Most users will not need to call this function directly, as it is called * by xas_store(). It is useful for doing conditional store operations * (see the xa_cmpxchg() implementation for an example). * * Return: If the slot already existed, returns the contents of this slot. * If the slot was newly created, returns %NULL. If it failed to create the * slot, returns %NULL and indicates the error in @xas. */ static void *xas_create(struct xa_state *xas, bool allow_root) { struct xarray *xa = xas->xa; void *entry; void __rcu **slot; struct xa_node *node = xas->xa_node; int shift; unsigned int order = xas->xa_shift; if (xas_top(node)) { entry = xa_head_locked(xa); xas->xa_node = NULL; if (!entry && xa_zero_busy(xa)) entry = XA_ZERO_ENTRY; shift = xas_expand(xas, entry); if (shift < 0) return NULL; if (!shift && !allow_root) shift = XA_CHUNK_SHIFT; entry = xa_head_locked(xa); slot = &xa->xa_head; } else if (xas_error(xas)) { return NULL; } else if (node) { unsigned int offset = xas->xa_offset; shift = node->shift; entry = xa_entry_locked(xa, node, offset); slot = &node->slots[offset]; } else { shift = 0; entry = xa_head_locked(xa); slot = &xa->xa_head; } while (shift > order) { shift -= XA_CHUNK_SHIFT; if (!entry) { node = xas_alloc(xas, shift); if (!node) break; if (xa_track_free(xa)) node_mark_all(node, XA_FREE_MARK); rcu_assign_pointer(*slot, xa_mk_node(node)); } else if (xa_is_node(entry)) { node = xa_to_node(entry); } else { break; } entry = xas_descend(xas, node); slot = &node->slots[xas->xa_offset]; } return entry; } /** * xas_create_range() - Ensure that stores to this range will succeed * @xas: XArray operation state. * * Creates all of the slots in the range covered by @xas. Sets @xas to * create single-index entries and positions it at the beginning of the * range. This is for the benefit of users which have not yet been * converted to use multi-index entries. */ void xas_create_range(struct xa_state *xas) { unsigned long index = xas->xa_index; unsigned char shift = xas->xa_shift; unsigned char sibs = xas->xa_sibs; xas->xa_index |= ((sibs + 1UL) << shift) - 1; if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift) xas->xa_offset |= sibs; xas->xa_shift = 0; xas->xa_sibs = 0; for (;;) { xas_create(xas, true); if (xas_error(xas)) goto restore; if (xas->xa_index <= (index | XA_CHUNK_MASK)) goto success; xas->xa_index -= XA_CHUNK_SIZE; for (;;) { struct xa_node *node = xas->xa_node; if (node->shift >= shift) break; xas->xa_node = xa_parent_locked(xas->xa, node); xas->xa_offset = node->offset - 1; if (node->offset != 0) break; } } restore: xas->xa_shift = shift; xas->xa_sibs = sibs; xas->xa_index = index; return; success: xas->xa_index = index; if (xas->xa_node) xas_set_offset(xas); } EXPORT_SYMBOL_GPL(xas_create_range); static void update_node(struct xa_state *xas, struct xa_node *node, int count, int values) { if (!node || (!count && !values)) return; node->count += count; node->nr_values += values; XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE); xas_update(xas, node); if (count < 0) xas_delete_node(xas); } /** * xas_store() - Store this entry in the XArray. * @xas: XArray operation state. * @entry: New entry. * * If @xas is operating on a multi-index entry, the entry returned by this * function is essentially meaningless (it may be an internal entry or it * may be %NULL, even if there are non-NULL entries at some of the indices * covered by the range). This is not a problem for any current users, * and can be changed if needed. * * Return: The old entry at this index. */ void *xas_store(struct xa_state *xas, void *entry) { struct xa_node *node; void __rcu **slot = &xas->xa->xa_head; unsigned int offset, max; int count = 0; int values = 0; void *first, *next; bool value = xa_is_value(entry); if (entry) { bool allow_root = !xa_is_node(entry) && !xa_is_zero(entry); first = xas_create(xas, allow_root); } else { first = xas_load(xas); } if (xas_invalid(xas)) return first; node = xas->xa_node; if (node && (xas->xa_shift < node->shift)) xas->xa_sibs = 0; if ((first == entry) && !xas->xa_sibs) return first; next = first; offset = xas->xa_offset; max = xas->xa_offset + xas->xa_sibs; if (node) { slot = &node->slots[offset]; if (xas->xa_sibs) xas_squash_marks(xas); } if (!entry) xas_init_marks(xas); for (;;) { /* * Must clear the marks before setting the entry to NULL, * otherwise xas_for_each_marked may find a NULL entry and * stop early. rcu_assign_pointer contains a release barrier * so the mark clearing will appear to happen before the * entry is set to NULL. */ rcu_assign_pointer(*slot, entry); if (xa_is_node(next) && (!node || node->shift)) xas_free_nodes(xas, xa_to_node(next)); if (!node) break; count += !next - !entry; values += !xa_is_value(first) - !value; if (entry) { if (offset == max) break; if (!xa_is_sibling(entry)) entry = xa_mk_sibling(xas->xa_offset); } else { if (offset == XA_CHUNK_MASK) break; } next = xa_entry_locked(xas->xa, node, ++offset); if (!xa_is_sibling(next)) { if (!entry && (offset > max)) break; first = next; } slot++; } update_node(xas, node, count, values); return first; } EXPORT_SYMBOL_GPL(xas_store); /** * xas_get_mark() - Returns the state of this mark. * @xas: XArray operation state. * @mark: Mark number. * * Return: true if the mark is set, false if the mark is clear or @xas * is in an error state. */ bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark) { if (xas_invalid(xas)) return false; if (!xas->xa_node) return xa_marked(xas->xa, mark); return node_get_mark(xas->xa_node, xas->xa_offset, mark); } EXPORT_SYMBOL_GPL(xas_get_mark); /** * xas_set_mark() - Sets the mark on this entry and its parents. * @xas: XArray operation state. * @mark: Mark number. * * Sets the specified mark on this entry, and walks up the tree setting it * on all the ancestor entries. Does nothing if @xas has not been walked to * an entry, or is in an error state. */ void xas_set_mark(const struct xa_state *xas, xa_mark_t mark) { struct xa_node *node = xas->xa_node; unsigned int offset = xas->xa_offset; if (xas_invalid(xas)) return; while (node) { if (node_set_mark(node, offset, mark)) return; offset = node->offset; node = xa_parent_locked(xas->xa, node); } if (!xa_marked(xas->xa, mark)) xa_mark_set(xas->xa, mark); } EXPORT_SYMBOL_GPL(xas_set_mark); /** * xas_clear_mark() - Clears the mark on this entry and its parents. * @xas: XArray operation state. * @mark: Mark number. * * Clears the specified mark on this entry, and walks back to the head * attempting to clear it on all the ancestor entries. Does nothing if * @xas has not been walked to an entry, or is in an error state. */ void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark) { struct xa_node *node = xas->xa_node; unsigned int offset = xas->xa_offset; if (xas_invalid(xas)) return; while (node) { if (!node_clear_mark(node, offset, mark)) return; if (node_any_mark(node, mark)) return; offset = node->offset; node = xa_parent_locked(xas->xa, node); } if (xa_marked(xas->xa, mark)) xa_mark_clear(xas->xa, mark); } EXPORT_SYMBOL_GPL(xas_clear_mark); /** * xas_init_marks() - Initialise all marks for the entry * @xas: Array operations state. * * Initialise all marks for the entry specified by @xas. If we're tracking * free entries with a mark, we need to set it on all entries. All other * marks are cleared. * * This implementation is not as efficient as it could be; we may walk * up the tree multiple times. */ void xas_init_marks(const struct xa_state *xas) { xa_mark_t mark = 0; for (;;) { if (xa_track_free(xas->xa) && mark == XA_FREE_MARK) xas_set_mark(xas, mark); else xas_clear_mark(xas, mark); if (mark == XA_MARK_MAX) break; mark_inc(mark); } } EXPORT_SYMBOL_GPL(xas_init_marks); #ifdef CONFIG_XARRAY_MULTI static unsigned int node_get_marks(struct xa_node *node, unsigned int offset) { unsigned int marks = 0; xa_mark_t mark = XA_MARK_0; for (;;) { if (node_get_mark(node, offset, mark)) marks |= 1 << (__force unsigned int)mark; if (mark == XA_MARK_MAX) break; mark_inc(mark); } return marks; } static void node_set_marks(struct xa_node *node, unsigned int offset, struct xa_node *child, unsigned int marks) { xa_mark_t mark = XA_MARK_0; for (;;) { if (marks & (1 << (__force unsigned int)mark)) { node_set_mark(node, offset, mark); if (child) node_mark_all(child, mark); } if (mark == XA_MARK_MAX) break; mark_inc(mark); } } /** * xas_split_alloc() - Allocate memory for splitting an entry. * @xas: XArray operation state. * @entry: New entry which will be stored in the array. * @order: Current entry order. * @gfp: Memory allocation flags. * * This function should be called before calling xas_split(). * If necessary, it will allocate new nodes (and fill them with @entry) * to prepare for the upcoming split of an entry of @order size into * entries of the order stored in the @xas. * * Context: May sleep if @gfp flags permit. */ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; unsigned int mask = xas->xa_sibs; /* XXX: no support for splitting really large entries yet */ if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order)) goto nomem; if (xas->xa_shift + XA_CHUNK_SHIFT > order) return; do { unsigned int i; void *sibling = NULL; struct xa_node *node; node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!node) goto nomem; node->array = xas->xa; for (i = 0; i < XA_CHUNK_SIZE; i++) { if ((i & mask) == 0) { RCU_INIT_POINTER(node->slots[i], entry); sibling = xa_mk_sibling(i); } else { RCU_INIT_POINTER(node->slots[i], sibling); } } RCU_INIT_POINTER(node->parent, xas->xa_alloc); xas->xa_alloc = node; } while (sibs-- > 0); return; nomem: xas_destroy(xas); xas_set_err(xas, -ENOMEM); } EXPORT_SYMBOL_GPL(xas_split_alloc); /** * xas_split() - Split a multi-index entry into smaller entries. * @xas: XArray operation state. * @entry: New entry to store in the array. * @order: Current entry order. * * The size of the new entries is set in @xas. The value in @entry is * copied to all the replacement entries. * * Context: Any context. The caller should hold the xa_lock. */ void xas_split(struct xa_state *xas, void *entry, unsigned int order) { unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; unsigned int offset, marks; struct xa_node *node; void *curr = xas_load(xas); int values = 0; node = xas->xa_node; if (xas_top(node)) return; marks = node_get_marks(node, xas->xa_offset); offset = xas->xa_offset + sibs; do { if (xas->xa_shift < node->shift) { struct xa_node *child = xas->xa_alloc; xas->xa_alloc = rcu_dereference_raw(child->parent); child->shift = node->shift - XA_CHUNK_SHIFT; child->offset = offset; child->count = XA_CHUNK_SIZE; child->nr_values = xa_is_value(entry) ? XA_CHUNK_SIZE : 0; RCU_INIT_POINTER(child->parent, node); node_set_marks(node, offset, child, marks); rcu_assign_pointer(node->slots[offset], xa_mk_node(child)); if (xa_is_value(curr)) values--; xas_update(xas, child); } else { unsigned int canon = offset - xas->xa_sibs; node_set_marks(node, canon, NULL, marks); rcu_assign_pointer(node->slots[canon], entry); while (offset > canon) rcu_assign_pointer(node->slots[offset--], xa_mk_sibling(canon)); values += (xa_is_value(entry) - xa_is_value(curr)) * (xas->xa_sibs + 1); } } while (offset-- > xas->xa_offset); node->nr_values += values; xas_update(xas, node); } EXPORT_SYMBOL_GPL(xas_split); #endif /** * xas_pause() - Pause a walk to drop a lock. * @xas: XArray operation state. * * Some users need to pause a walk and drop the lock they're holding in * order to yield to a higher priority thread or carry out an operation * on an entry. Those users should call this function before they drop * the lock. It resets the @xas to be suitable for the next iteration * of the loop after the user has reacquired the lock. If most entries * found during a walk require you to call xas_pause(), the xa_for_each() * iterator may be more appropriate. * * Note that xas_pause() only works for forward iteration. If a user needs * to pause a reverse iteration, we will need a xas_pause_rev(). */ void xas_pause(struct xa_state *xas) { struct xa_node *node = xas->xa_node; if (xas_invalid(xas)) return; xas->xa_node = XAS_RESTART; if (node) { unsigned long offset = xas->xa_offset; while (++offset < XA_CHUNK_SIZE) { if (!xa_is_sibling(xa_entry(xas->xa, node, offset))) break; } xas->xa_index += (offset - xas->xa_offset) << node->shift; if (xas->xa_index == 0) xas->xa_node = XAS_BOUNDS; } else { xas->xa_index++; } } EXPORT_SYMBOL_GPL(xas_pause); /* * __xas_prev() - Find the previous entry in the XArray. * @xas: XArray operation state. * * Helper function for xas_prev() which handles all the complex cases * out of line. */ void *__xas_prev(struct xa_state *xas) { void *entry; if (!xas_frozen(xas->xa_node)) xas->xa_index--; if (!xas->xa_node) return set_bounds(xas); if (xas_not_node(xas->xa_node)) return xas_load(xas); if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) xas->xa_offset--; while (xas->xa_offset == 255) { xas->xa_offset = xas->xa_node->offset - 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); if (!xas->xa_node) return set_bounds(xas); } for (;;) { entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (!xa_is_node(entry)) return entry; xas->xa_node = xa_to_node(entry); xas_set_offset(xas); } } EXPORT_SYMBOL_GPL(__xas_prev); /* * __xas_next() - Find the next entry in the XArray. * @xas: XArray operation state. * * Helper function for xas_next() which handles all the complex cases * out of line. */ void *__xas_next(struct xa_state *xas) { void *entry; if (!xas_frozen(xas->xa_node)) xas->xa_index++; if (!xas->xa_node) return set_bounds(xas); if (xas_not_node(xas->xa_node)) return xas_load(xas); if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) xas->xa_offset++; while (xas->xa_offset == XA_CHUNK_SIZE) { xas->xa_offset = xas->xa_node->offset + 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); if (!xas->xa_node) return set_bounds(xas); } for (;;) { entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (!xa_is_node(entry)) return entry; xas->xa_node = xa_to_node(entry); xas_set_offset(xas); } } EXPORT_SYMBOL_GPL(__xas_next); /** * xas_find() - Find the next present entry in the XArray. * @xas: XArray operation state. * @max: Highest index to return. * * If the @xas has not yet been walked to an entry, return the entry * which has an index >= xas.xa_index. If it has been walked, the entry * currently being pointed at has been processed, and so we move to the * next entry. * * If no entry is found and the array is smaller than @max, the iterator * is set to the smallest index not yet in the array. This allows @xas * to be immediately passed to xas_store(). * * Return: The entry, if found, otherwise %NULL. */ void *xas_find(struct xa_state *xas, unsigned long max) { void *entry; if (xas_error(xas) || xas->xa_node == XAS_BOUNDS) return NULL; if (xas->xa_index > max) return set_bounds(xas); if (!xas->xa_node) { xas->xa_index = 1; return set_bounds(xas); } else if (xas->xa_node == XAS_RESTART) { entry = xas_load(xas); if (entry || xas_not_node(xas->xa_node)) return entry; } else if (!xas->xa_node->shift && xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) { xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; } xas_next_offset(xas); while (xas->xa_node && (xas->xa_index <= max)) { if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { xas->xa_offset = xas->xa_node->offset + 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); continue; } entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (xa_is_node(entry)) { xas->xa_node = xa_to_node(entry); xas->xa_offset = 0; continue; } if (entry && !xa_is_sibling(entry)) return entry; xas_next_offset(xas); } if (!xas->xa_node) xas->xa_node = XAS_BOUNDS; return NULL; } EXPORT_SYMBOL_GPL(xas_find); /** * xas_find_marked() - Find the next marked entry in the XArray. * @xas: XArray operation state. * @max: Highest index to return. * @mark: Mark number to search for. * * If the @xas has not yet been walked to an entry, return the marked entry * which has an index >= xas.xa_index. If it has been walked, the entry * currently being pointed at has been processed, and so we return the * first marked entry with an index > xas.xa_index. * * If no marked entry is found and the array is smaller than @max, @xas is * set to the bounds state and xas->xa_index is set to the smallest index * not yet in the array. This allows @xas to be immediately passed to * xas_store(). * * If no entry is found before @max is reached, @xas is set to the restart * state. * * Return: The entry, if found, otherwise %NULL. */ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) { bool advance = true; unsigned int offset; void *entry; if (xas_error(xas)) return NULL; if (xas->xa_index > max) goto max; if (!xas->xa_node) { xas->xa_index = 1; goto out; } else if (xas_top(xas->xa_node)) { advance = false; entry = xa_head(xas->xa); xas->xa_node = NULL; if (xas->xa_index > max_index(entry)) goto out; if (!xa_is_node(entry)) { if (xa_marked(xas->xa, mark)) return entry; xas->xa_index = 1; goto out; } xas->xa_node = xa_to_node(entry); xas->xa_offset = xas->xa_index >> xas->xa_node->shift; } while (xas->xa_index <= max) { if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { xas->xa_offset = xas->xa_node->offset + 1; xas->xa_node = xa_parent(xas->xa, xas->xa_node); if (!xas->xa_node) break; advance = false; continue; } if (!advance) { entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (xa_is_sibling(entry)) { xas->xa_offset = xa_to_sibling(entry); xas_move_index(xas, xas->xa_offset); } } offset = xas_find_chunk(xas, advance, mark); if (offset > xas->xa_offset) { advance = false; xas_move_index(xas, offset); /* Mind the wrap */ if ((xas->xa_index - 1) >= max) goto max; xas->xa_offset = offset; if (offset == XA_CHUNK_SIZE) continue; } entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK)) continue; if (!xa_is_node(entry)) return entry; xas->xa_node = xa_to_node(entry); xas_set_offset(xas); } out: if (xas->xa_index > max) goto max; return set_bounds(xas); max: xas->xa_node = XAS_RESTART; return NULL; } EXPORT_SYMBOL_GPL(xas_find_marked); /** * xas_find_conflict() - Find the next present entry in a range. * @xas: XArray operation state. * * The @xas describes both a range and a position within that range. * * Context: Any context. Expects xa_lock to be held. * Return: The next entry in the range covered by @xas or %NULL. */ void *xas_find_conflict(struct xa_state *xas) { void *curr; if (xas_error(xas)) return NULL; if (!xas->xa_node) return NULL; if (xas_top(xas->xa_node)) { curr = xas_start(xas); if (!curr) return NULL; while (xa_is_node(curr)) { struct xa_node *node = xa_to_node(curr); curr = xas_descend(xas, node); } if (curr) return curr; } if (xas->xa_node->shift > xas->xa_shift) return NULL; for (;;) { if (xas->xa_node->shift == xas->xa_shift) { if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs) break; } else if (xas->xa_offset == XA_CHUNK_MASK) { xas->xa_offset = xas->xa_node->offset; xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node); if (!xas->xa_node) break; continue; } curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset); if (xa_is_sibling(curr)) continue; while (xa_is_node(curr)) { xas->xa_node = xa_to_node(curr); xas->xa_offset = 0; curr = xa_entry_locked(xas->xa, xas->xa_node, 0); } if (curr) return curr; } xas->xa_offset -= xas->xa_sibs; return NULL; } EXPORT_SYMBOL_GPL(xas_find_conflict); /** * xa_load() - Load an entry from an XArray. * @xa: XArray. * @index: index into array. * * Context: Any context. Takes and releases the RCU lock. * Return: The entry at @index in @xa. */ void *xa_load(struct xarray *xa, unsigned long index) { XA_STATE(xas, xa, index); void *entry; rcu_read_lock(); do { entry = xas_load(&xas); if (xa_is_zero(entry)) entry = NULL; } while (xas_retry(&xas, entry)); rcu_read_unlock(); return entry; } EXPORT_SYMBOL(xa_load); static void *xas_result(struct xa_state *xas, void *curr) { if (xa_is_zero(curr)) return NULL; if (xas_error(xas)) curr = xas->xa_node; return curr; } /** * __xa_erase() - Erase this entry from the XArray while locked. * @xa: XArray. * @index: Index into array. * * After this function returns, loading from @index will return %NULL. * If the index is part of a multi-index entry, all indices will be erased * and none of the entries will be part of a multi-index entry. * * Context: Any context. Expects xa_lock to be held on entry. * Return: The entry which used to be at this index. */ void *__xa_erase(struct xarray *xa, unsigned long index) { XA_STATE(xas, xa, index); return xas_result(&xas, xas_store(&xas, NULL)); } EXPORT_SYMBOL(__xa_erase); /** * xa_erase() - Erase this entry from the XArray. * @xa: XArray. * @index: Index of entry. * * After this function returns, loading from @index will return %NULL. * If the index is part of a multi-index entry, all indices will be erased * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock. * Return: The entry which used to be at this index. */ void *xa_erase(struct xarray *xa, unsigned long index) { void *entry; xa_lock(xa); entry = __xa_erase(xa, index); xa_unlock(xa); return entry; } EXPORT_SYMBOL(xa_erase); /** * __xa_store() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * You must already be holding the xa_lock when calling this function. * It will drop the lock if needed to allocate memory, and then reacquire * it afterwards. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: The old entry at this index or xa_err() if an error happened. */ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { XA_STATE(xas, xa, index); void *curr; if (WARN_ON_ONCE(xa_is_advanced(entry))) return XA_ERROR(-EINVAL); if (xa_track_free(xa) && !entry) entry = XA_ZERO_ENTRY; do { curr = xas_store(&xas, entry); if (xa_track_free(xa)) xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); return xas_result(&xas, curr); } EXPORT_SYMBOL(__xa_store); /** * xa_store() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * After this function returns, loads from this index will return @entry. * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Any context. Takes and releases the xa_lock. * May sleep if the @gfp flags permit. * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation * failed. */ void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { void *curr; xa_lock(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock(xa); return curr; } EXPORT_SYMBOL(xa_store); /** * __xa_cmpxchg() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @old: Old value to test against. * @entry: New entry. * @gfp: Memory allocation flags. * * You must already be holding the xa_lock when calling this function. * It will drop the lock if needed to allocate memory, and then reacquire * it afterwards. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: The old entry at this index or xa_err() if an error happened. */ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, void *old, void *entry, gfp_t gfp) { XA_STATE(xas, xa, index); void *curr; if (WARN_ON_ONCE(xa_is_advanced(entry))) return XA_ERROR(-EINVAL); do { curr = xas_load(&xas); if (curr == old) { xas_store(&xas, entry); if (xa_track_free(xa) && entry && !curr) xas_clear_mark(&xas, XA_FREE_MARK); } } while (__xas_nomem(&xas, gfp)); return xas_result(&xas, curr); } EXPORT_SYMBOL(__xa_cmpxchg); /** * __xa_insert() - Store this entry in the XArray if no entry is present. * @xa: XArray. * @index: Index into array. * @entry: New entry. * @gfp: Memory allocation flags. * * Inserting a NULL entry will store a reserved entry (like xa_reserve()) * if no entry is present. Inserting will fail if a reserved entry is * present, even though loading from this index will return NULL. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { XA_STATE(xas, xa, index); void *curr; if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; if (!entry) entry = XA_ZERO_ENTRY; do { curr = xas_load(&xas); if (!curr) { xas_store(&xas, entry); if (xa_track_free(xa)) xas_clear_mark(&xas, XA_FREE_MARK); } else { xas_set_err(&xas, -EBUSY); } } while (__xas_nomem(&xas, gfp)); return xas_error(&xas); } EXPORT_SYMBOL(__xa_insert); #ifdef CONFIG_XARRAY_MULTI static void xas_set_range(struct xa_state *xas, unsigned long first, unsigned long last) { unsigned int shift = 0; unsigned long sibs = last - first; unsigned int offset = XA_CHUNK_MASK; xas_set(xas, first); while ((first & XA_CHUNK_MASK) == 0) { if (sibs < XA_CHUNK_MASK) break; if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK)) break; shift += XA_CHUNK_SHIFT; if (offset == XA_CHUNK_MASK) offset = sibs & XA_CHUNK_MASK; sibs >>= XA_CHUNK_SHIFT; first >>= XA_CHUNK_SHIFT; } offset = first & XA_CHUNK_MASK; if (offset + sibs > XA_CHUNK_MASK) sibs = XA_CHUNK_MASK - offset; if ((((first + sibs + 1) << shift) - 1) > last) sibs -= 1; xas->xa_shift = shift; xas->xa_sibs = sibs; } /** * xa_store_range() - Store this entry at a range of indices in the XArray. * @xa: XArray. * @first: First index to affect. * @last: Last index to affect. * @entry: New entry. * @gfp: Memory allocation flags. * * After this function returns, loads from any index between @first and @last, * inclusive will return @entry. * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Process context. Takes and releases the xa_lock. May sleep * if the @gfp flags permit. * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in * an XArray, or xa_err(-ENOMEM) if memory allocation failed. */ void *xa_store_range(struct xarray *xa, unsigned long first, unsigned long last, void *entry, gfp_t gfp) { XA_STATE(xas, xa, 0); if (WARN_ON_ONCE(xa_is_internal(entry))) return XA_ERROR(-EINVAL); if (last < first) return XA_ERROR(-EINVAL); do { xas_lock(&xas); if (entry) { unsigned int order = BITS_PER_LONG; if (last + 1) order = __ffs(last + 1); xas_set_order(&xas, last, order); xas_create(&xas, true); if (xas_error(&xas)) goto unlock; } do { xas_set_range(&xas, first, last); xas_store(&xas, entry); if (xas_error(&xas)) goto unlock; first += xas_size(&xas); } while (first <= last); unlock: xas_unlock(&xas); } while (xas_nomem(&xas, gfp)); return xas_result(&xas, NULL); } EXPORT_SYMBOL(xa_store_range); /** * xa_get_order() - Get the order of an entry. * @xa: XArray. * @index: Index of the entry. * * Return: A number between 0 and 63 indicating the order of the entry. */ int xa_get_order(struct xarray *xa, unsigned long index) { XA_STATE(xas, xa, index); void *entry; int order = 0; rcu_read_lock(); entry = xas_load(&xas); if (!entry) goto unlock; if (!xas.xa_node) goto unlock; for (;;) { unsigned int slot = xas.xa_offset + (1 << order); if (slot >= XA_CHUNK_SIZE) break; if (!xa_is_sibling(xas.xa_node->slots[slot])) break; order++; } order += xas.xa_node->shift; unlock: rcu_read_unlock(); return order; } EXPORT_SYMBOL(xa_get_order); #endif /* CONFIG_XARRAY_MULTI */ /** * __xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @limit: Range for allocated ID. * @entry: New entry. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or * -EBUSY if there are no free entries in @limit. */ int __xa_alloc(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, gfp_t gfp) { XA_STATE(xas, xa, 0); if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; if (WARN_ON_ONCE(!xa_track_free(xa))) return -EINVAL; if (!entry) entry = XA_ZERO_ENTRY; do { xas.xa_index = limit.min; xas_find_marked(&xas, limit.max, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) xas_set_err(&xas, -EBUSY); else *id = xas.xa_index; xas_store(&xas, entry); xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); return xas_error(&xas); } EXPORT_SYMBOL(__xa_alloc); /** * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. * @entry: New entry. * @limit: Range of allocated ID. * @next: Pointer to next ID to allocate. * @gfp: Memory allocation flags. * * Finds an empty entry in @xa between @limit.min and @limit.max, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * The search for an empty entry will start at @next and will wrap * around if necessary. * * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the * allocation succeeded after wrapping, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, u32 *next, gfp_t gfp) { u32 min = limit.min; int ret; limit.min = max(min, *next); ret = __xa_alloc(xa, id, entry, limit, gfp); if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) { xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED; ret = 1; } if (ret < 0 && limit.min > min) { limit.min = min; ret = __xa_alloc(xa, id, entry, limit, gfp); if (ret == 0) ret = 1; } if (ret >= 0) { *next = *id + 1; if (*next == 0) xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED; } return ret; } EXPORT_SYMBOL(__xa_alloc_cyclic); /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Attempting to set a mark on a %NULL entry does not succeed. * * Context: Any context. Expects xa_lock to be held on entry. */ void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { XA_STATE(xas, xa, index); void *entry = xas_load(&xas); if (entry) xas_set_mark(&xas, mark); } EXPORT_SYMBOL(__xa_set_mark); /** * __xa_clear_mark() - Clear this mark on this entry while locked. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Context: Any context. Expects xa_lock to be held on entry. */ void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { XA_STATE(xas, xa, index); void *entry = xas_load(&xas); if (entry) xas_clear_mark(&xas, mark); } EXPORT_SYMBOL(__xa_clear_mark); /** * xa_get_mark() - Inquire whether this mark is set on this entry. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * This function uses the RCU read lock, so the result may be out of date * by the time it returns. If you need the result to be stable, use a lock. * * Context: Any context. Takes and releases the RCU lock. * Return: True if the entry at @index has this mark set, false if it doesn't. */ bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { XA_STATE(xas, xa, index); void *entry; rcu_read_lock(); entry = xas_start(&xas); while (xas_get_mark(&xas, mark)) { if (!xa_is_node(entry)) goto found; entry = xas_descend(&xas, xa_to_node(entry)); } rcu_read_unlock(); return false; found: rcu_read_unlock(); return true; } EXPORT_SYMBOL(xa_get_mark); /** * xa_set_mark() - Set this mark on this entry. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Attempting to set a mark on a %NULL entry does not succeed. * * Context: Process context. Takes and releases the xa_lock. */ void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { xa_lock(xa); __xa_set_mark(xa, index, mark); xa_unlock(xa); } EXPORT_SYMBOL(xa_set_mark); /** * xa_clear_mark() - Clear this mark on this entry. * @xa: XArray. * @index: Index of entry. * @mark: Mark number. * * Clearing a mark always succeeds. * * Context: Process context. Takes and releases the xa_lock. */ void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) { xa_lock(xa); __xa_clear_mark(xa, index, mark); xa_unlock(xa); } EXPORT_SYMBOL(xa_clear_mark); /** * xa_find() - Search the XArray for an entry. * @xa: XArray. * @indexp: Pointer to an index. * @max: Maximum index to search to. * @filter: Selection criterion. * * Finds the entry in @xa which matches the @filter, and has the lowest * index that is at least @indexp and no more than @max. * If an entry is found, @indexp is updated to be the index of the entry. * This function is protected by the RCU read lock, so it may not find * entries which are being simultaneously added. It will not return an * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). * * Context: Any context. Takes and releases the RCU lock. * Return: The entry, if found, otherwise %NULL. */ void *xa_find(struct xarray *xa, unsigned long *indexp, unsigned long max, xa_mark_t filter) { XA_STATE(xas, xa, *indexp); void *entry; rcu_read_lock(); do { if ((__force unsigned int)filter < XA_MAX_MARKS) entry = xas_find_marked(&xas, max, filter); else entry = xas_find(&xas, max); } while (xas_retry(&xas, entry)); rcu_read_unlock(); if (entry) *indexp = xas.xa_index; return entry; } EXPORT_SYMBOL(xa_find); static bool xas_sibling(struct xa_state *xas) { struct xa_node *node = xas->xa_node; unsigned long mask; if (!IS_ENABLED(CONFIG_XARRAY_MULTI) || !node) return false; mask = (XA_CHUNK_SIZE << node->shift) - 1; return (xas->xa_index & mask) > ((unsigned long)xas->xa_offset << node->shift); } /** * xa_find_after() - Search the XArray for a present entry. * @xa: XArray. * @indexp: Pointer to an index. * @max: Maximum index to search to. * @filter: Selection criterion. * * Finds the entry in @xa which matches the @filter and has the lowest * index that is above @indexp and no more than @max. * If an entry is found, @indexp is updated to be the index of the entry. * This function is protected by the RCU read lock, so it may miss entries * which are being simultaneously added. It will not return an * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). * * Context: Any context. Takes and releases the RCU lock. * Return: The pointer, if found, otherwise %NULL. */ void *xa_find_after(struct xarray *xa, unsigned long *indexp, unsigned long max, xa_mark_t filter) { XA_STATE(xas, xa, *indexp + 1); void *entry; if (xas.xa_index == 0) return NULL; rcu_read_lock(); for (;;) { if ((__force unsigned int)filter < XA_MAX_MARKS) entry = xas_find_marked(&xas, max, filter); else entry = xas_find(&xas, max); if (xas_invalid(&xas)) break; if (xas_sibling(&xas)) continue; if (!xas_retry(&xas, entry)) break; } rcu_read_unlock(); if (entry) *indexp = xas.xa_index; return entry; } EXPORT_SYMBOL(xa_find_after); static unsigned int xas_extract_present(struct xa_state *xas, void **dst, unsigned long max, unsigned int n) { void *entry; unsigned int i = 0; rcu_read_lock(); xas_for_each(xas, entry, max) { if (xas_retry(xas, entry)) continue; dst[i++] = entry; if (i == n) break; } rcu_read_unlock(); return i; } static unsigned int xas_extract_marked(struct xa_state *xas, void **dst, unsigned long max, unsigned int n, xa_mark_t mark) { void *entry; unsigned int i = 0; rcu_read_lock(); xas_for_each_marked(xas, entry, max, mark) { if (xas_retry(xas, entry)) continue; dst[i++] = entry; if (i == n) break; } rcu_read_unlock(); return i; } /** * xa_extract() - Copy selected entries from the XArray into a normal array. * @xa: The source XArray to copy from. * @dst: The buffer to copy entries into. * @start: The first index in the XArray eligible to be selected. * @max: The last index in the XArray eligible to be selected. * @n: The maximum number of entries to copy. * @filter: Selection criterion. * * Copies up to @n entries that match @filter from the XArray. The * copied entries will have indices between @start and @max, inclusive. * * The @filter may be an XArray mark value, in which case entries which are * marked with that mark will be copied. It may also be %XA_PRESENT, in * which case all entries which are not %NULL will be copied. * * The entries returned may not represent a snapshot of the XArray at a * moment in time. For example, if another thread stores to index 5, then * index 10, calling xa_extract() may return the old contents of index 5 * and the new contents of index 10. Indices not modified while this * function is running will not be skipped. * * If you need stronger guarantees, holding the xa_lock across calls to this * function will prevent concurrent modification. * * Context: Any context. Takes and releases the RCU lock. * Return: The number of entries copied. */ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, unsigned long max, unsigned int n, xa_mark_t filter) { XA_STATE(xas, xa, start); if (!n) return 0; if ((__force unsigned int)filter < XA_MAX_MARKS) return xas_extract_marked(&xas, dst, max, n, filter); return xas_extract_present(&xas, dst, max, n); } EXPORT_SYMBOL(xa_extract); /** * xa_delete_node() - Private interface for workingset code. * @node: Node to be removed from the tree. * @update: Function to call to update ancestor nodes. * * Context: xa_lock must be held on entry and will not be released. */ void xa_delete_node(struct xa_node *node, xa_update_node_t update) { struct xa_state xas = { .xa = node->array, .xa_index = (unsigned long)node->offset << (node->shift + XA_CHUNK_SHIFT), .xa_shift = node->shift + XA_CHUNK_SHIFT, .xa_offset = node->offset, .xa_node = xa_parent_locked(node->array, node), .xa_update = update, }; xas_store(&xas, NULL); } EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */ /** * xa_destroy() - Free all internal data structures. * @xa: XArray. * * After calling this function, the XArray is empty and has freed all memory * allocated for its internal data structures. You are responsible for * freeing the objects referenced by the XArray. * * Context: Any context. Takes and releases the xa_lock, interrupt-safe. */ void xa_destroy(struct xarray *xa) { XA_STATE(xas, xa, 0); unsigned long flags; void *entry; xas.xa_node = NULL; xas_lock_irqsave(&xas, flags); entry = xa_head_locked(xa); RCU_INIT_POINTER(xa->xa_head, NULL); xas_init_marks(&xas); if (xa_zero_busy(xa)) xa_mark_clear(xa, XA_FREE_MARK); /* lockdep checks we're still holding the lock in xas_free_nodes() */ if (xa_is_node(entry)) xas_free_nodes(&xas, xa_to_node(entry)); xas_unlock_irqrestore(&xas, flags); } EXPORT_SYMBOL(xa_destroy); #ifdef XA_DEBUG void xa_dump_node(const struct xa_node *node) { unsigned i, j; if (!node) return; if ((unsigned long)node & 3) { pr_cont("node %px\n", node); return; } pr_cont("node %px %s %d parent %px shift %d count %d values %d " "array %px list %px %px marks", node, node->parent ? "offset" : "max", node->offset, node->parent, node->shift, node->count, node->nr_values, node->array, node->private_list.prev, node->private_list.next); for (i = 0; i < XA_MAX_MARKS; i++) for (j = 0; j < XA_MARK_LONGS; j++) pr_cont(" %lx", node->marks[i][j]); pr_cont("\n"); } void xa_dump_index(unsigned long index, unsigned int shift) { if (!shift) pr_info("%lu: ", index); else if (shift >= BITS_PER_LONG) pr_info("0-%lu: ", ~0UL); else pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1)); } void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift) { if (!entry) return; xa_dump_index(index, shift); if (xa_is_node(entry)) { if (shift == 0) { pr_cont("%px\n", entry); } else { unsigned long i; struct xa_node *node = xa_to_node(entry); xa_dump_node(node); for (i = 0; i < XA_CHUNK_SIZE; i++) xa_dump_entry(node->slots[i], index + (i << node->shift), node->shift); } } else if (xa_is_value(entry)) pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry), xa_to_value(entry), entry); else if (!xa_is_internal(entry)) pr_cont("%px\n", entry); else if (xa_is_retry(entry)) pr_cont("retry (%ld)\n", xa_to_internal(entry)); else if (xa_is_sibling(entry)) pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry)); else if (xa_is_zero(entry)) pr_cont("zero (%ld)\n", xa_to_internal(entry)); else pr_cont("UNKNOWN ENTRY (%px)\n", entry); } void xa_dump(const struct xarray *xa) { void *entry = xa->xa_head; unsigned int shift = 0; pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry, xa->xa_flags, xa_marked(xa, XA_MARK_0), xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2)); if (xa_is_node(entry)) shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT; xa_dump_entry(entry, 0, shift); } #endif
1 9 9 5 3 2 1 2 6 1 7 2 2 4 6 9 1 1 1 1 1 34656 34672 34656 2 2 2 1 9 3 1 1 1 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 // SPDX-License-Identifier: GPL-2.0-only /* * xsave/xrstor support. * * Author: Suresh Siddha <suresh.b.siddha@intel.com> */ #include <linux/bitops.h> #include <linux/compat.h> #include <linux/cpu.h> #include <linux/mman.h> #include <linux/nospec.h> #include <linux/pkeys.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <asm/fpu/api.h> #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/xcr.h> #include <asm/tlbflush.h> #include <asm/prctl.h> #include <asm/elf.h> #include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" #define for_each_extended_xfeature(bit, mask) \ (bit) = FIRST_EXTENDED_XFEATURE; \ for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask)) /* * Although we spell it out in here, the Processor Trace * xfeature is completely unused. We use other mechanisms * to save/restore PT state in Linux. */ static const char *xfeature_names[] = { "x87 floating point registers", "SSE registers", "AVX registers", "MPX bounds registers", "MPX CSR", "AVX-512 opmask", "AVX-512 Hi256", "AVX-512 ZMM_Hi256", "Processor Trace (unused)", "Protection Keys User registers", "PASID state", "Control-flow User registers", "Control-flow Kernel registers (unused)", "unknown xstate feature", "unknown xstate feature", "unknown xstate feature", "unknown xstate feature", "AMX Tile config", "AMX Tile data", "unknown xstate feature", }; static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_FP] = X86_FEATURE_FPU, [XFEATURE_SSE] = X86_FEATURE_XMM, [XFEATURE_YMM] = X86_FEATURE_AVX, [XFEATURE_BNDREGS] = X86_FEATURE_MPX, [XFEATURE_BNDCSR] = X86_FEATURE_MPX, [XFEATURE_OPMASK] = X86_FEATURE_AVX512F, [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, [XFEATURE_PKRU] = X86_FEATURE_OSPKE, [XFEATURE_PASID] = X86_FEATURE_ENQCMD, [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, }; static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; #define XSTATE_FLAG_SUPERVISOR BIT(0) #define XSTATE_FLAG_ALIGNED64 BIT(1) /* * Return whether the system supports a given xfeature. * * Also return the name of the (most advanced) feature that the caller requested: */ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) { u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features; if (unlikely(feature_name)) { long xfeature_idx, max_idx; u64 xfeatures_print; /* * So we use FLS here to be able to print the most advanced * feature that was requested but is missing. So if a driver * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the * missing AVX feature - this is the most informative message * to users: */ if (xfeatures_missing) xfeatures_print = xfeatures_missing; else xfeatures_print = xfeatures_needed; xfeature_idx = fls64(xfeatures_print)-1; max_idx = ARRAY_SIZE(xfeature_names)-1; xfeature_idx = min(xfeature_idx, max_idx); *feature_name = xfeature_names[xfeature_idx]; } if (xfeatures_missing) return 0; return 1; } EXPORT_SYMBOL_GPL(cpu_has_xfeatures); static bool xfeature_is_aligned64(int xfeature_nr) { return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64; } static bool xfeature_is_supervisor(int xfeature_nr) { return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR; } static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) { unsigned int offs, i; /* * Non-compacted format and legacy features use the cached fixed * offsets. */ if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) || xfeature <= XFEATURE_SSE) return xstate_offsets[xfeature]; /* * Compacted format offsets depend on the actual content of the * compacted xsave area which is determined by the xcomp_bv header * field. */ offs = FXSAVE_SIZE + XSAVE_HDR_SIZE; for_each_extended_xfeature(i, xcomp_bv) { if (xfeature_is_aligned64(i)) offs = ALIGN(offs, 64); if (i == xfeature) break; offs += xstate_sizes[i]; } return offs; } /* * Enable the extended processor state save/restore feature. * Called once per CPU onlining. */ void fpu__init_cpu_xstate(void) { if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features) return; cr4_set_bits(X86_CR4_OSXSAVE); /* * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures * that any stale state is wiped out from XFD. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user * states can be set here. */ xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * MSR_IA32_XSS sets supervisor states managed by XSAVES. */ if (boot_cpu_has(X86_FEATURE_XSAVES)) { wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } } static bool xfeature_enabled(enum xfeature xfeature) { return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); } /* * Record the offsets and sizes of various xstates contained * in the XSAVE state memory layout. */ static void __init setup_xstate_cache(void) { u32 eax, ebx, ecx, edx, i; /* start at the beginning of the "extended state" */ unsigned int last_good_offset = offsetof(struct xregs_state, extended_state_area); /* * The FP xstates and SSE xstates are legacy states. They are always * in the fixed offsets in the xsave area in either compacted form * or standard form. */ xstate_offsets[XFEATURE_FP] = 0; xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, xmm_space); xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); xstate_sizes[i] = eax; xstate_flags[i] = ecx; /* * If an xfeature is supervisor state, the offset in EBX is * invalid, leave it to -1. */ if (xfeature_is_supervisor(i)) continue; xstate_offsets[i] = ebx; /* * In our xstate size checks, we assume that the highest-numbered * xstate feature has the highest offset in the buffer. Ensure * it does. */ WARN_ONCE(last_good_offset > xstate_offsets[i], "x86/fpu: misordered xstate at %d\n", last_good_offset); last_good_offset = xstate_offsets[i]; } } static void __init print_xstate_feature(u64 xstate_mask) { const char *feature_name; if (cpu_has_xfeatures(xstate_mask, &feature_name)) pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name); } /* * Print out all the supported xstate features: */ static void __init print_xstate_features(void) { print_xstate_feature(XFEATURE_MASK_FP); print_xstate_feature(XFEATURE_MASK_SSE); print_xstate_feature(XFEATURE_MASK_YMM); print_xstate_feature(XFEATURE_MASK_BNDREGS); print_xstate_feature(XFEATURE_MASK_BNDCSR); print_xstate_feature(XFEATURE_MASK_OPMASK); print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); print_xstate_feature(XFEATURE_MASK_PKRU); print_xstate_feature(XFEATURE_MASK_PASID); print_xstate_feature(XFEATURE_MASK_CET_USER); print_xstate_feature(XFEATURE_MASK_XTILE_CFG); print_xstate_feature(XFEATURE_MASK_XTILE_DATA); } /* * This check is important because it is easy to get XSTATE_* * confused with XSTATE_BIT_*. */ #define CHECK_XFEATURE(nr) do { \ WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ WARN_ON(nr >= XFEATURE_MAX); \ } while (0) /* * Print out xstate component offsets and sizes */ static void __init print_xstate_offset_size(void) { int i; for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, xfeature_get_offset(fpu_kernel_cfg.max_features, i), i, xstate_sizes[i]); } } /* * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ static __init void os_xrstor_booting(struct xregs_state *xstate) { u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE; u32 lmask = mask; u32 hmask = mask >> 32; int err; if (cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); /* * We should never fault when copying from a kernel buffer, and the FPU * state we set at boot time should be valid. */ WARN_ON_FPU(err); } /* * All supported features have either init state all zeros or are * handled in setup_init_fpu() individually. This is an explicit * feature list and does not use XFEATURE_MASK*SUPPORTED to catch * newly added supported features at build time and make people * actually look at the init state for the new feature. */ #define XFEATURES_INIT_FPSTATE_HANDLED \ (XFEATURE_MASK_FP | \ XFEATURE_MASK_SSE | \ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM | \ XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_PASID | \ XFEATURE_MASK_CET_USER | \ XFEATURE_MASK_XTILE) /* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) { BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED) != XFEATURES_INIT_FPSTATE_HANDLED); if (!boot_cpu_has(X86_FEATURE_XSAVE)) return; print_xstate_features(); xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures); /* * Init all the features state with header.xfeatures being 0x0 */ os_xrstor_booting(&init_fpstate.regs.xsave); /* * All components are now in init state. Read the state back so * that init_fpstate contains all non-zero init state. This only * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because * those use the init optimization which skips writing data for * components in init state. * * XSAVE could be used, but that would require to reshuffle the * data when XSAVEC/S is available because XSAVEC/S uses xstate * compaction. But doing so is a pointless exercise because most * components have an all zeros init state except for the legacy * ones (FP and SSE). Those can be saved with FXSAVE into the * legacy area. Adding new features requires to ensure that init * state is all zeroes or if not to add the necessary handling * here. */ fxsave(&init_fpstate.regs.fxsave); } int xfeature_size(int xfeature_nr) { u32 eax, ebx, ecx, edx; CHECK_XFEATURE(xfeature_nr); cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); return eax; } /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ static int validate_user_xstate_header(const struct xstate_header *hdr, struct fpstate *fpstate) { /* No unknown or supervisor features may be set */ if (hdr->xfeatures & ~fpstate->user_xfeatures) return -EINVAL; /* Userspace must use the uncompacted format */ if (hdr->xcomp_bv) return -EINVAL; /* * If 'reserved' is shrunken to add a new field, make sure to validate * that new field here! */ BUILD_BUG_ON(sizeof(hdr->reserved) != 48); /* No reserved bits may be set */ if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) return -EINVAL; return 0; } static void __init __xstate_dump_leaves(void) { int i; u32 eax, ebx, ecx, edx; static int should_dump = 1; if (!should_dump) return; should_dump = 0; /* * Dump out a few leaves past the ones that we support * just in case there are some goodies up there */ for (i = 0; i < XFEATURE_MAX + 10; i++) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", XSTATE_CPUID, i, eax, ebx, ecx, edx); } } #define XSTATE_WARN_ON(x, fmt, ...) do { \ if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \ __xstate_dump_leaves(); \ } \ } while (0) #define XCHECK_SZ(sz, nr, __struct) ({ \ if (WARN_ONCE(sz != sizeof(__struct), \ "[%s]: struct is %zu bytes, cpu state %d bytes\n", \ xfeature_names[nr], sizeof(__struct), sz)) { \ __xstate_dump_leaves(); \ } \ true; \ }) /** * check_xtile_data_against_struct - Check tile data state size. * * Calculate the state size by multiplying the single tile size which is * recorded in a C struct, and the number of tiles that the CPU informs. * Compare the provided size with the calculation. * * @size: The tile data state size * * Returns: 0 on success, -EINVAL on mismatch. */ static int __init check_xtile_data_against_struct(int size) { u32 max_palid, palid, state_size; u32 eax, ebx, ecx, edx; u16 max_tile; /* * Check the maximum palette id: * eax: the highest numbered palette subleaf. */ cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx); /* * Cross-check each tile size and find the maximum number of * supported tiles. */ for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { u16 tile_size, max; /* * Check the tile size info: * eax[31:16]: bytes per title * ebx[31:16]: the max names (or max number of tiles) */ cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx); tile_size = eax >> 16; max = ebx >> 16; if (tile_size != sizeof(struct xtile_data)) { pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", __stringify(XFEATURE_XTILE_DATA), sizeof(struct xtile_data), tile_size); __xstate_dump_leaves(); return -EINVAL; } if (max > max_tile) max_tile = max; } state_size = sizeof(struct xtile_data) * max_tile; if (size != state_size) { pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", __stringify(XFEATURE_XTILE_DATA), state_size, size); __xstate_dump_leaves(); return -EINVAL; } return 0; } /* * We have a C struct for each 'xstate'. We need to ensure * that our software representation matches what the CPU * tells us about the state's size. */ static bool __init check_xstate_against_struct(int nr) { /* * Ask the CPU for the size of the state. */ int sz = xfeature_size(nr); /* * Match each CPU state with the corresponding software * structure. */ switch (nr) { case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct); case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state); case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state); case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state); case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state); case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state); case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state); case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; default: XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr); return false; } return true; } static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) { unsigned int topmost = fls64(xfeatures) - 1; unsigned int offset = xstate_offsets[topmost]; if (topmost <= XFEATURE_SSE) return sizeof(struct xregs_state); if (compacted) offset = xfeature_get_offset(xfeatures, topmost); return offset + xstate_sizes[topmost]; } /* * This essentially double-checks what the cpu told us about * how large the XSAVE buffer needs to be. We are recalculating * it to be safe. * * Independent XSAVE features allocate their own buffers and are not * covered by these checks. Only the size of the buffer for task->fpu * is checked here. */ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) { bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES); unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { if (!check_xstate_against_struct(i)) return false; /* * Supervisor state components can be managed only by * XSAVES. */ if (!xsaves && xfeature_is_supervisor(i)) { XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i); return false; } } size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); XSTATE_WARN_ON(size != kernel_size, "size %u != kernel_size %u\n", size, kernel_size); return size == kernel_size; } /* * Get total size of enabled xstates in XCR0 | IA32_XSS. * * Note the SDM's wording here. "sub-function 0" only enumerates * the size of the *user* states. If we use it to size a buffer * that we use 'XSAVES' on, we could potentially overflow the * buffer because 'XSAVES' saves system states too. * * This also takes compaction into account. So this works for * XSAVEC as well. */ static unsigned int __init get_compacted_size(void) { unsigned int eax, ebx, ecx, edx; /* * - CPUID function 0DH, sub-function 1: * EBX enumerates the size (in bytes) required by * the XSAVES instruction for an XSAVE area * containing all the state components * corresponding to bits currently set in * XCR0 | IA32_XSS. * * When XSAVES is not available but XSAVEC is (virt), then there * are no supervisor states, but XSAVEC still uses compacted * format. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); return ebx; } /* * Get the total size of the enabled xstates without the independent supervisor * features. */ static unsigned int __init get_xsave_compacted_size(void) { u64 mask = xfeatures_mask_independent(); unsigned int size; if (!mask) return get_compacted_size(); /* Disable independent features. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); /* * Ask the hardware what size is required of the buffer. * This is the size required for the task->fpu buffer. */ size = get_compacted_size(); /* Re-enable independent features so XSAVES will work on them again. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); return size; } static unsigned int __init get_xsave_size_user(void) { unsigned int eax, ebx, ecx, edx; /* * - CPUID function 0DH, sub-function 0: * EBX enumerates the size (in bytes) required by * the XSAVE instruction for an XSAVE area * containing all the *user* state components * corresponding to bits currently set in XCR0. */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); return ebx; } static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ unsigned int user_size, kernel_size, kernel_default_size; bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); /* Uncompacted user space size */ user_size = get_xsave_size_user(); /* * XSAVES kernel size includes supervisor states and uses compacted * format. XSAVEC uses compacted format, but does not save * supervisor states. * * XSAVE[OPT] do not support supervisor states so kernel and user * size is identical. */ if (compacted) kernel_size = get_xsave_compacted_size(); else kernel_size = user_size; kernel_default_size = xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); if (!paranoid_xstate_size_valid(kernel_size)) return -EINVAL; fpu_kernel_cfg.max_size = kernel_size; fpu_user_cfg.max_size = user_size; fpu_kernel_cfg.default_size = kernel_default_size; fpu_user_cfg.default_size = xstate_calculate_size(fpu_user_cfg.default_features, false); return 0; } /* * We enabled the XSAVE hardware, but something went wrong and * we can not use it. Disable it. */ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) { fpu_kernel_cfg.max_features = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); /* Restore the legacy size.*/ fpu_kernel_cfg.max_size = legacy_size; fpu_kernel_cfg.default_size = legacy_size; fpu_user_cfg.max_size = legacy_size; fpu_user_cfg.default_size = legacy_size; /* * Prevent enabling the static branch which enables writes to the * XFD MSR. */ init_fpstate.xfd = 0; fpstate_reset(&current->thread.fpu); } /* * Enable and initialize the xsave feature. * Called once per system bootup. */ void __init fpu__init_system_xstate(unsigned int legacy_size) { unsigned int eax, ebx, ecx, edx; u64 xfeatures; int err; int i; if (!boot_cpu_has(X86_FEATURE_FPU)) { pr_info("x86/fpu: No FPU detected\n"); return; } if (!boot_cpu_has(X86_FEATURE_XSAVE)) { pr_info("x86/fpu: x87 FPU will use %s\n", boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); return; } if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { WARN_ON_FPU(1); return; } /* * Find user xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); fpu_kernel_cfg.max_features = eax + ((u64)edx << 32); /* * Find supervisor xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /* * This indicates that something really unexpected happened * with the enumeration. Disable XSAVE and try to continue * booting without it. This is too early to BUG(). */ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", fpu_kernel_cfg.max_features); goto out_disable; } /* * Clear XSAVE features that are disabled in the normal CPUID. */ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { unsigned short cid = xsave_cpuid_features[i]; /* Careful: X86_FEATURE_FPU is 0! */ if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid)) fpu_kernel_cfg.max_features &= ~BIT_ULL(i); } if (!cpu_feature_enabled(X86_FEATURE_XFD)) fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; else fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED; fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; /* Clean out dynamic features from default */ fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; fpu_user_cfg.default_features = fpu_user_cfg.max_features; fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; /* Store it for paranoia check at the end */ xfeatures = fpu_kernel_cfg.max_features; /* * Initialize the default XFD state in initfp_state and enable the * dynamic sizing mechanism if dynamic states are available. The * static key cannot be enabled here because this runs before * jump_label_init(). This is delayed to an initcall. */ init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; /* Set up compaction feature bit */ if (cpu_feature_enabled(X86_FEATURE_XSAVEC) || cpu_feature_enabled(X86_FEATURE_XSAVES)) setup_force_cpu_cap(X86_FEATURE_XCOMPACTED); /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); /* Cache size, offset and flags for initialization */ setup_xstate_cache(); err = init_xstate_size(); if (err) goto out_disable; /* Reset the state for the current task */ fpstate_reset(&current->thread.fpu); /* * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: */ update_regset_xstate_info(fpu_user_cfg.max_size, fpu_user_cfg.max_features); /* * init_fpstate excludes dynamic states as they are large but init * state is zero. */ init_fpstate.size = fpu_kernel_cfg.default_size; init_fpstate.xfeatures = fpu_kernel_cfg.default_features; if (init_fpstate.size > sizeof(init_fpstate.regs)) { pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n", sizeof(init_fpstate.regs), init_fpstate.size); goto out_disable; } setup_init_fpu_buf(); /* * Paranoia check whether something in the setup modified the * xfeatures mask. */ if (xfeatures != fpu_kernel_cfg.max_features) { pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n", xfeatures, fpu_kernel_cfg.max_features); goto out_disable; } /* * CPU capabilities initialization runs before FPU init. So * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely * functional, set the feature bit so depending code works. */ setup_force_cpu_cap(X86_FEATURE_OSXSAVE); print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", fpu_kernel_cfg.max_features, fpu_kernel_cfg.max_size, boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard"); return; out_disable: /* something went wrong, try to boot without any XSAVE support */ fpu__init_disable_system_xstate(legacy_size); } /* * Restore minimal FPU state after suspend: */ void fpu__resume_cpu(void) { /* * Restore XCR0 on xsave capable CPUs: */ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * Restore IA32_XSS. The same CPUID bit enumerates support * of XSAVES and MSR_IA32_XSS. */ if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } if (fpu_state_size_dynamic()) wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd); } /* * Given an xstate feature nr, calculate where in the xsave * buffer the state is. Callers should ensure that the buffer * is valid. */ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { u64 xcomp_bv = xsave->header.xcomp_bv; if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) return NULL; if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) { if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) return NULL; } return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr); } /* * Given the xsave area and a state inside, this function returns the * address of the state. * * This is the API that is called to get xstate address in either * standard format or compacted format of xsave area. * * Note that if there is no data for the field in the xsave buffer * this will return NULL. * * Inputs: * xstate: the thread's storage area for all FPU data * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. */ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { /* * Do we even *have* xsave state? */ if (!boot_cpu_has(X86_FEATURE_XSAVE)) return NULL; /* * We should not ever be requesting features that we * have not enabled. */ if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) return NULL; /* * This assumes the last 'xsave*' instruction to * have requested that 'xfeature_nr' be saved. * If it did not, we might be seeing and old value * of the field in the buffer. * * This can happen because the last 'xsave' did not * request that this feature be saved (unlikely) * or because the "init optimization" caused it * to not be saved. */ if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) return NULL; return __raw_xsave_addr(xsave, xfeature_nr); } #ifdef CONFIG_ARCH_HAS_PKEYS /* * This will go out and modify PKRU register to set the access * rights for @pkey to @init_val. */ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { u32 old_pkru, new_pkru_bits = 0; int pkey_shift; /* * This check implies XSAVE support. OSPKE only gets * set if we enable XSAVE and we enable PKU in XCR0. */ if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return -EINVAL; /* * This code should only be called with valid 'pkey' * values originating from in-kernel users. Complain * if a bad value is observed. */ if (WARN_ON_ONCE(pkey >= arch_max_pkey())) return -EINVAL; /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) new_pkru_bits |= PKRU_AD_BIT; if (init_val & PKEY_DISABLE_WRITE) new_pkru_bits |= PKRU_WD_BIT; /* Shift the bits in to the correct place in PKRU for pkey: */ pkey_shift = pkey * PKRU_BITS_PER_PKEY; new_pkru_bits <<= pkey_shift; /* Get old PKRU and mask off any old bits in place: */ old_pkru = read_pkru(); old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); /* Write old part along with new part: */ write_pkru(old_pkru | new_pkru_bits); return 0; } #endif /* ! CONFIG_ARCH_HAS_PKEYS */ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, void *init_xstate, unsigned int size) { membuf_write(to, from_xstate ? xstate : init_xstate, size); } /** * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @fpstate: The fpstate buffer from which to copy * @xfeatures: The mask of xfeatures to save (XSAVE mode only) * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming * format, i.e. from the kernel internal hardware dependent storage format * to the requested @mode. UABI XSTATE is always uncompacted! * * It supports partial copy but @to.pos always starts from zero. */ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, u64 xfeatures, u32 pkru_val, enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; struct xregs_state *xsave = &fpstate->regs.xsave; struct xstate_header header; unsigned int zerofrom; u64 mask; int i; memset(&header, 0, sizeof(header)); header.xfeatures = xsave->header.xfeatures; /* Mask out the feature bits depending on copy mode */ switch (copy_mode) { case XSTATE_COPY_FP: header.xfeatures &= XFEATURE_MASK_FP; break; case XSTATE_COPY_FX: header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE; break; case XSTATE_COPY_XSAVE: header.xfeatures &= fpstate->user_xfeatures & xfeatures; break; } /* Copy FP state up to MXCSR */ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387, &xinit->i387, off_mxcsr); /* Copy MXCSR when SSE or YMM are set in the feature mask */ copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM), &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr, MXCSR_AND_FLAGS_SIZE); /* Copy the remaining FP state */ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387.st_space, &xinit->i387.st_space, sizeof(xsave->i387.st_space)); /* Copy the SSE state - shared with YMM, but independently managed */ copy_feature(header.xfeatures & XFEATURE_MASK_SSE, &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space, sizeof(xsave->i387.xmm_space)); if (copy_mode != XSTATE_COPY_XSAVE) goto out; /* Zero the padding area */ membuf_zero(&to, sizeof(xsave->i387.padding)); /* Copy xsave->i387.sw_reserved */ membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved)); /* Copy the user space relevant state of @xsave->header */ membuf_write(&to, &header, sizeof(header)); zerofrom = offsetof(struct xregs_state, extended_state_area); /* * This 'mask' indicates which states to copy from fpstate. * Those extended states that are not present in fpstate are * either disabled or initialized: * * In non-compacted format, disabled features still occupy * state space but there is no state to copy from in the * compacted init_fpstate. The gap tracking will zero these * states. * * The extended features have an all zeroes init state. Thus, * remove them from 'mask' to zero those features in the user * buffer instead of retrieving them from init_fpstate. */ mask = header.xfeatures; for_each_extended_xfeature(i, mask) { /* * If there was a feature or alignment gap, zero the space * in the destination buffer. */ if (zerofrom < xstate_offsets[i]) membuf_zero(&to, xstate_offsets[i] - zerofrom); if (i == XFEATURE_PKRU) { struct pkru_state pkru = {0}; /* * PKRU is not necessarily up to date in the * XSAVE buffer. Use the provided value. */ pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { membuf_write(&to, __raw_xsave_addr(xsave, i), xstate_sizes[i]); } /* * Keep track of the last copied state in the non-compacted * target buffer for gap zeroing. */ zerofrom = xstate_offsets[i] + xstate_sizes[i]; } out: if (to.left) membuf_zero(&to, to.left); } /** * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @tsk: The task from which to copy the saved xstate * @copy_mode: The requested copy mode * * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming * format, i.e. from the kernel internal hardware dependent storage format * to the requested @mode. UABI XSTATE is always uncompacted! * * It supports partial copy but @to.pos always starts from zero. */ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, tsk->thread.fpu.fpstate->user_xfeatures, tsk->thread.pkru, copy_mode); } static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, const void *kbuf, const void __user *ubuf) { if (kbuf) { memcpy(dst, kbuf + offset, size); } else { if (copy_from_user(dst, ubuf + offset, size)) return -EFAULT; } return 0; } /** * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate * @fpstate: The fpstate buffer to copy to * @kbuf: The UABI format buffer, if it comes from the kernel * @ubuf: The UABI format buffer, if it comes from userspace * @pkru: The location to write the PKRU value to * * Converts from the UABI format into the kernel internal hardware * dependent format. * * This function ultimately has three different callers with distinct PKRU * behavior. * 1. When called from sigreturn the PKRU register will be restored from * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to * @fpstate is sufficient to cover this case, but the caller will also * pass a pointer to the thread_struct's pkru field in @pkru and updating * it is harmless. * 2. When called from ptrace the PKRU register will be restored from the * thread_struct's pkru field. A pointer to that is passed in @pkru. * The kernel will restore it manually, so the XRSTOR behavior that resets * the PKRU register to the hardware init value (0) if the corresponding * xfeatures bit is not set is emulated here. * 3. When called from KVM the PKRU register will be restored from the vcpu's * pkru field. A pointer to that is passed in @pkru. KVM hasn't used * XRSTOR and hasn't had the PKRU resetting behavior described above. To * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures * bit is not set. */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf, u32 *pkru) { struct xregs_state *xsave = &fpstate->regs.xsave; unsigned int offset, size; struct xstate_header hdr; u64 mask; int i; offset = offsetof(struct xregs_state, header); if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) return -EFAULT; if (validate_user_xstate_header(&hdr, fpstate)) return -EINVAL; /* Validate MXCSR when any of the related features is in use */ mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; if (hdr.xfeatures & mask) { u32 mxcsr[2]; offset = offsetof(struct fxregs_state, mxcsr); if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf)) return -EFAULT; /* Reserved bits in MXCSR must be zero. */ if (mxcsr[0] & ~mxcsr_feature_mask) return -EINVAL; /* SSE and YMM require MXCSR even when FP is not in use. */ if (!(hdr.xfeatures & XFEATURE_MASK_FP)) { xsave->i387.mxcsr = mxcsr[0]; xsave->i387.mxcsr_mask = mxcsr[1]; } } for (i = 0; i < XFEATURE_MAX; i++) { mask = BIT_ULL(i); if (hdr.xfeatures & mask) { void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; if (copy_from_buffer(dst, offset, size, kbuf, ubuf)) return -EFAULT; } } if (hdr.xfeatures & XFEATURE_MASK_PKRU) { struct pkru_state *xpkru; xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); *pkru = xpkru->pkru; } else { /* * KVM may pass NULL here to indicate that it does not need * PKRU updated. */ if (pkru) *pkru = 0; } /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': */ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; /* * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= hdr.xfeatures; return 0; } /* * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] * format and copy to the target thread. Used by ptrace and KVM. */ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) { return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru); } /* * Convert from a sigreturn standard-format user-space buffer to kernel * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru); } static bool validate_independent_components(u64 mask) { u64 xchk; if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) return false; xchk = ~xfeatures_mask_independent(); if (WARN_ON_ONCE(!mask || mask & xchk)) return false; return true; } /** * xsaves - Save selected components to a kernel xstate buffer * @xstate: Pointer to the buffer * @mask: Feature mask to select the components to save * * The @xstate buffer must be 64 byte aligned and correctly initialized as * XSAVES does not write the full xstate header. Before first use the * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer * can #GP. * * The feature mask must be a subset of the independent features. */ void xsaves(struct xregs_state *xstate, u64 mask) { int err; if (!validate_independent_components(mask)) return; XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); WARN_ON_ONCE(err); } /** * xrstors - Restore selected components from a kernel xstate buffer * @xstate: Pointer to the buffer * @mask: Feature mask to select the components to restore * * The @xstate buffer must be 64 byte aligned and correctly initialized * otherwise XRSTORS from that buffer can #GP. * * Proper usage is to restore the state which was saved with * xsaves() into @xstate. * * The feature mask must be a subset of the independent features. */ void xrstors(struct xregs_state *xstate, u64 mask) { int err; if (!validate_independent_components(mask)) return; XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); WARN_ON_ONCE(err); } #if IS_ENABLED(CONFIG_KVM) void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature) { void *addr = get_xsave_addr(&fps->regs.xsave, xfeature); if (addr) memset(addr, 0, xstate_sizes[xfeature]); } EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); #endif #ifdef CONFIG_X86_64 #ifdef CONFIG_X86_DEBUG_FPU /* * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask * can safely operate on the @fpstate buffer. */ static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) { u64 xfd = __this_cpu_read(xfd_state); if (fpstate->xfd == xfd) return true; /* * The XFD MSR does not match fpstate->xfd. That's invalid when * the passed in fpstate is current's fpstate. */ if (fpstate->xfd == current->thread.fpu.fpstate->xfd) return false; /* * XRSTOR(S) from init_fpstate are always correct as it will just * bring all components into init state and not read from the * buffer. XSAVE(S) raises #PF after init. */ if (fpstate == &init_fpstate) return rstor; /* * XSAVE(S): clone(), fpu_swap_kvm_fpu() * XRSTORS(S): fpu_swap_kvm_fpu() */ /* * No XSAVE/XRSTOR instructions (except XSAVE itself) touch * the buffer area for XFD-disabled state components. */ mask &= ~xfd; /* * Remove features which are valid in fpstate. They * have space allocated in fpstate. */ mask &= ~fpstate->xfeatures; /* * Any remaining state components in 'mask' might be written * by XSAVE/XRSTOR. Fail validation it found. */ return !mask; } void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor)); } #endif /* CONFIG_X86_DEBUG_FPU */ static int __init xfd_update_static_branch(void) { /* * If init_fpstate.xfd has bits set then dynamic features are * available and the dynamic sizing must be enabled. */ if (init_fpstate.xfd) static_branch_enable(&__fpu_state_size_dynamic); return 0; } arch_initcall(xfd_update_static_branch) void fpstate_free(struct fpu *fpu) { if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate) vfree(fpu->fpstate); } /** * fpstate_realloc - Reallocate struct fpstate for the requested new features * * @xfeatures: A bitmap of xstate features which extend the enabled features * of that task * @ksize: The required size for the kernel buffer * @usize: The required size for user space buffers * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations * * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer * terminates quickly, vfree()-induced IPIs may be a concern, but tasks * with large states are likely to live longer. * * Returns: 0 on success, -ENOMEM on allocation error. */ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, unsigned int usize, struct fpu_guest *guest_fpu) { struct fpu *fpu = &current->thread.fpu; struct fpstate *curfps, *newfps = NULL; unsigned int fpsize; bool in_use; fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); newfps = vzalloc(fpsize); if (!newfps) return -ENOMEM; newfps->size = ksize; newfps->user_size = usize; newfps->is_valloc = true; /* * When a guest FPU is supplied, use @guest_fpu->fpstate * as reference independent whether it is in use or not. */ curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate; /* Determine whether @curfps is the active fpstate */ in_use = fpu->fpstate == curfps; if (guest_fpu) { newfps->is_guest = true; newfps->is_confidential = curfps->is_confidential; newfps->in_use = curfps->in_use; guest_fpu->xfeatures |= xfeatures; guest_fpu->uabi_size = usize; } fpregs_lock(); /* * If @curfps is in use, ensure that the current state is in the * registers before swapping fpstate as that might invalidate it * due to layout changes. */ if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); if (guest_fpu) { guest_fpu->fpstate = newfps; /* If curfps is active, update the FPU fpstate pointer */ if (in_use) fpu->fpstate = newfps; } else { fpu->fpstate = newfps; } if (in_use) xfd_update_state(fpu->fpstate); fpregs_unlock(); /* Only free valloc'ed state */ if (curfps && curfps->is_valloc) vfree(curfps); return 0; } static int validate_sigaltstack(unsigned int usize) { struct task_struct *thread, *leader = current->group_leader; unsigned long framesize = get_sigframe_size(); lockdep_assert_held(&current->sighand->siglock); /* get_sigframe_size() is based on fpu_user_cfg.max_size */ framesize -= fpu_user_cfg.max_size; framesize += usize; for_each_thread(leader, thread) { if (thread->sas_ss_size && thread->sas_ss_size < framesize) return -ENOSPC; } return 0; } static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) { /* * This deliberately does not exclude !XSAVES as we still might * decide to optionally context switch XCR0 or talk the silicon * vendors into extending XFD for the pre AMX states, especially * AVX512. */ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); struct fpu *fpu = &current->group_leader->thread.fpu; struct fpu_state_perm *perm; unsigned int ksize, usize; u64 mask; int ret = 0; /* Check whether fully enabled */ if ((permitted & requested) == requested) return 0; /* Calculate the resulting kernel state size */ mask = permitted | requested; /* Take supervisor states into account on the host */ if (!guest) mask |= xfeatures_mask_supervisor(); ksize = xstate_calculate_size(mask, compacted); /* Calculate the resulting user state size */ mask &= XFEATURE_MASK_USER_SUPPORTED; usize = xstate_calculate_size(mask, false); if (!guest) { ret = validate_sigaltstack(usize); if (ret) return ret; } perm = guest ? &fpu->guest_perm : &fpu->perm; /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ WRITE_ONCE(perm->__state_perm, mask); /* Protected by sighand lock */ perm->__state_size = ksize; perm->__user_state_size = usize; return ret; } /* * Permissions array to map facilities with more than one component */ static const u64 xstate_prctl_req[XFEATURE_MAX] = { [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, }; static int xstate_request_perm(unsigned long idx, bool guest) { u64 permitted, requested; int ret; if (idx >= XFEATURE_MAX) return -EINVAL; /* * Look up the facility mask which can require more than * one xstate component. */ idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req)); requested = xstate_prctl_req[idx]; if (!requested) return -EOPNOTSUPP; if ((fpu_user_cfg.max_features & requested) != requested) return -EOPNOTSUPP; /* Lockless quick check */ permitted = xstate_get_group_perm(guest); if ((permitted & requested) == requested) return 0; /* Protect against concurrent modifications */ spin_lock_irq(&current->sighand->siglock); permitted = xstate_get_group_perm(guest); /* First vCPU allocation locks the permissions. */ if (guest && (permitted & FPU_GUEST_PERM_LOCKED)) ret = -EBUSY; else ret = __xstate_request_perm(permitted, requested, guest); spin_unlock_irq(&current->sighand->siglock); return ret; } int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) { u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; struct fpu_state_perm *perm; unsigned int ksize, usize; struct fpu *fpu; if (!xfd_event) { if (!guest_fpu) pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); return 0; } /* Protect against concurrent modifications */ spin_lock_irq(&current->sighand->siglock); /* If not permitted let it die */ if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) { spin_unlock_irq(&current->sighand->siglock); return -EPERM; } fpu = &current->group_leader->thread.fpu; perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; ksize = perm->__state_size; usize = perm->__user_state_size; /* * The feature is permitted. State size is sufficient. Dropping * the lock is safe here even if more features are added from * another task, the retrieved buffer sizes are valid for the * currently requested feature(s). */ spin_unlock_irq(&current->sighand->siglock); /* * Try to allocate a new fpstate. If that fails there is no way * out. */ if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu)) return -EFAULT; return 0; } int xfd_enable_feature(u64 xfd_err) { return __xfd_enable_feature(xfd_err, NULL); } #else /* CONFIG_X86_64 */ static inline int xstate_request_perm(unsigned long idx, bool guest) { return -EPERM; } #endif /* !CONFIG_X86_64 */ u64 xstate_get_guest_group_perm(void) { return xstate_get_group_perm(true); } EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); /** * fpu_xstate_prctl - xstate permission operations * @option: A subfunction of arch_prctl() * @arg2: option argument * Return: 0 if successful; otherwise, an error code * * Option arguments: * * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info * ARCH_REQ_XCOMP_PERM: Facility number requested * * For facilities which require more than one XSTATE component, the request * must be the highest state component number related to that facility, * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18). */ long fpu_xstate_prctl(int option, unsigned long arg2) { u64 __user *uptr = (u64 __user *)arg2; u64 permitted, supported; unsigned long idx = arg2; bool guest = false; switch (option) { case ARCH_GET_XCOMP_SUPP: supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features; return put_user(supported, uptr); case ARCH_GET_XCOMP_PERM: /* * Lockless snapshot as it can also change right after the * dropping the lock. */ permitted = xstate_get_host_group_perm(); permitted &= XFEATURE_MASK_USER_SUPPORTED; return put_user(permitted, uptr); case ARCH_GET_XCOMP_GUEST_PERM: permitted = xstate_get_guest_group_perm(); permitted &= XFEATURE_MASK_USER_SUPPORTED; return put_user(permitted, uptr); case ARCH_REQ_XCOMP_GUEST_PERM: guest = true; fallthrough; case ARCH_REQ_XCOMP_PERM: if (!IS_ENABLED(CONFIG_X86_64)) return -EOPNOTSUPP; return xstate_request_perm(idx, guest); default: return -EINVAL; } } #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 * use in the task. */ static void avx512_status(struct seq_file *m, struct task_struct *task) { unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp); long delta; if (!timestamp) { /* * Report -1 if no AVX512 usage */ delta = -1; } else { delta = (long)(jiffies - timestamp); /* * Cap to LONG_MAX if time difference > LONG_MAX */ if (delta < 0) delta = LONG_MAX; delta = jiffies_to_msecs(delta); } seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta); seq_putc(m, '\n'); } /* * Report architecture specific information */ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { /* * Report AVX512 state if the processor and build option supported. */ if (cpu_feature_enabled(X86_FEATURE_AVX512F)) avx512_status(m, task); return 0; } #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_GPIO_DRIVER_H #define __LINUX_GPIO_DRIVER_H #include <linux/bits.h> #include <linux/cleanup.h> #include <linux/err.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> #include <linux/irqhandler.h> #include <linux/lockdep.h> #include <linux/pinctrl/pinconf-generic.h> #include <linux/pinctrl/pinctrl.h> #include <linux/property.h> #include <linux/spinlock_types.h> #include <linux/types.h> #ifdef CONFIG_GENERIC_MSI_IRQ #include <asm/msi.h> #endif struct device; struct irq_chip; struct irq_data; struct module; struct of_phandle_args; struct pinctrl_dev; struct seq_file; struct gpio_chip; struct gpio_desc; struct gpio_device; enum gpio_lookup_flags; enum gpiod_flags; union gpio_irq_fwspec { struct irq_fwspec fwspec; #ifdef CONFIG_GENERIC_MSI_IRQ msi_alloc_info_t msiinfo; #endif }; #define GPIO_LINE_DIRECTION_IN 1 #define GPIO_LINE_DIRECTION_OUT 0 /** * struct gpio_irq_chip - GPIO interrupt controller */ struct gpio_irq_chip { /** * @chip: * * GPIO IRQ chip implementation, provided by GPIO driver. */ struct irq_chip *chip; /** * @domain: * * Interrupt translation domain; responsible for mapping between GPIO * hwirq number and Linux IRQ number. */ struct irq_domain *domain; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /** * @fwnode: * * Firmware node corresponding to this gpiochip/irqchip, necessary * for hierarchical irqdomain support. */ struct fwnode_handle *fwnode; /** * @parent_domain: * * If non-NULL, will be set as the parent of this GPIO interrupt * controller's IRQ domain to establish a hierarchical interrupt * domain. The presence of this will activate the hierarchical * interrupt support. */ struct irq_domain *parent_domain; /** * @child_to_parent_hwirq: * * This callback translates a child hardware IRQ offset to a parent * hardware IRQ offset on a hierarchical interrupt chip. The child * hardware IRQs correspond to the GPIO index 0..ngpio-1 (see the * ngpio field of struct gpio_chip) and the corresponding parent * hardware IRQ and type (such as IRQ_TYPE_*) shall be returned by * the driver. The driver can calculate this from an offset or using * a lookup table or whatever method is best for this chip. Return * 0 on successful translation in the driver. * * If some ranges of hardware IRQs do not have a corresponding parent * HWIRQ, return -EINVAL, but also make sure to fill in @valid_mask and * @need_valid_mask to make these GPIO lines unavailable for * translation. */ int (*child_to_parent_hwirq)(struct gpio_chip *gc, unsigned int child_hwirq, unsigned int child_type, unsigned int *parent_hwirq, unsigned int *parent_type); /** * @populate_parent_alloc_arg : * * This optional callback allocates and populates the specific struct * for the parent's IRQ domain. If this is not specified, then * &gpiochip_populate_parent_fwspec_twocell will be used. A four-cell * variant named &gpiochip_populate_parent_fwspec_fourcell is also * available. */ int (*populate_parent_alloc_arg)(struct gpio_chip *gc, union gpio_irq_fwspec *fwspec, unsigned int parent_hwirq, unsigned int parent_type); /** * @child_offset_to_irq: * * This optional callback is used to translate the child's GPIO line * offset on the GPIO chip to an IRQ number for the GPIO to_irq() * callback. If this is not specified, then a default callback will be * provided that returns the line offset. */ unsigned int (*child_offset_to_irq)(struct gpio_chip *gc, unsigned int pin); /** * @child_irq_domain_ops: * * The IRQ domain operations that will be used for this GPIO IRQ * chip. If no operations are provided, then default callbacks will * be populated to setup the IRQ hierarchy. Some drivers need to * supply their own translate function. */ struct irq_domain_ops child_irq_domain_ops; #endif /** * @handler: * * The IRQ handler to use (often a predefined IRQ core function) for * GPIO IRQs, provided by GPIO driver. */ irq_flow_handler_t handler; /** * @default_type: * * Default IRQ triggering type applied during GPIO driver * initialization, provided by GPIO driver. */ unsigned int default_type; /** * @lock_key: * * Per GPIO IRQ chip lockdep class for IRQ lock. */ struct lock_class_key *lock_key; /** * @request_key: * * Per GPIO IRQ chip lockdep class for IRQ request. */ struct lock_class_key *request_key; /** * @parent_handler: * * The interrupt handler for the GPIO chip's parent interrupts, may be * NULL if the parent interrupts are nested rather than cascaded. */ irq_flow_handler_t parent_handler; union { /** * @parent_handler_data: * * If @per_parent_data is false, @parent_handler_data is a * single pointer used as the data associated with every * parent interrupt. */ void *parent_handler_data; /** * @parent_handler_data_array: * * If @per_parent_data is true, @parent_handler_data_array is * an array of @num_parents pointers, and is used to associate * different data for each parent. This cannot be NULL if * @per_parent_data is true. */ void **parent_handler_data_array; }; /** * @num_parents: * * The number of interrupt parents of a GPIO chip. */ unsigned int num_parents; /** * @parents: * * A list of interrupt parents of a GPIO chip. This is owned by the * driver, so the core will only reference this list, not modify it. */ unsigned int *parents; /** * @map: * * A list of interrupt parents for each line of a GPIO chip. */ unsigned int *map; /** * @threaded: * * True if set the interrupt handling uses nested threads. */ bool threaded; /** * @per_parent_data: * * True if parent_handler_data_array describes a @num_parents * sized array to be used as parent data. */ bool per_parent_data; /** * @initialized: * * Flag to track GPIO chip irq member's initialization. * This flag will make sure GPIO chip irq members are not used * before they are initialized. */ bool initialized; /** * @domain_is_allocated_externally: * * True it the irq_domain was allocated outside of gpiolib, in which * case gpiolib won't free the irq_domain itself. */ bool domain_is_allocated_externally; /** * @init_hw: optional routine to initialize hardware before * an IRQ chip will be added. This is quite useful when * a particular driver wants to clear IRQ related registers * in order to avoid undesired events. */ int (*init_hw)(struct gpio_chip *gc); /** * @init_valid_mask: optional routine to initialize @valid_mask, to be * used if not all GPIO lines are valid interrupts. Sometimes some * lines just cannot fire interrupts, and this routine, when defined, * is passed a bitmap in "valid_mask" and it will have ngpios * bits from 0..(ngpios-1) set to "1" as in valid. The callback can * then directly set some bits to "0" if they cannot be used for * interrupts. */ void (*init_valid_mask)(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios); /** * @valid_mask: * * If not %NULL, holds bitmask of GPIOs which are valid to be included * in IRQ domain of the chip. */ unsigned long *valid_mask; /** * @first: * * Required for static IRQ allocation. If set, irq_domain_add_simple() * will allocate and map all IRQs during initialization. */ unsigned int first; /** * @irq_enable: * * Store old irq_chip irq_enable callback */ void (*irq_enable)(struct irq_data *data); /** * @irq_disable: * * Store old irq_chip irq_disable callback */ void (*irq_disable)(struct irq_data *data); /** * @irq_unmask: * * Store old irq_chip irq_unmask callback */ void (*irq_unmask)(struct irq_data *data); /** * @irq_mask: * * Store old irq_chip irq_mask callback */ void (*irq_mask)(struct irq_data *data); }; /** * struct gpio_chip - abstract a GPIO controller * @label: a functional name for the GPIO device, such as a part * number or the name of the SoC IP-block implementing it. * @gpiodev: the internal state holder, opaque struct * @parent: optional parent device providing the GPIOs * @fwnode: optional fwnode providing this controller's properties * @owner: helps prevent removal of modules exporting active GPIOs * @request: optional hook for chip-specific activation, such as * enabling module power and clock; may sleep * @free: optional hook for chip-specific deactivation, such as * disabling module power and clock; may sleep * @get_direction: returns direction for signal "offset", 0=out, 1=in, * (same as GPIO_LINE_DIRECTION_OUT / GPIO_LINE_DIRECTION_IN), * or negative error. It is recommended to always implement this * function, even on input-only or output-only gpio chips. * @direction_input: configures signal "offset" as input, or returns error * This can be omitted on input-only or output-only gpio chips. * @direction_output: configures signal "offset" as output, or returns error * This can be omitted on input-only or output-only gpio chips. * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error * @set: assigns output value for signal "offset" * @set_multiple: assigns output values for multiple signals defined by "mask" * @set_config: optional hook for all kinds of settings. Uses the same * packed config format as generic pinconf. * @to_irq: optional hook supporting non-static gpiod_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code * will be used when this is omitted, but custom code can show extra * state (such as pullup/pulldown configuration). * @init_valid_mask: optional routine to initialize @valid_mask, to be used if * not all GPIOs are valid. * @add_pin_ranges: optional routine to initialize pin ranges, to be used when * requires special mapping of the pins that provides GPIO functionality. * It is called after adding GPIO chip and before adding IRQ chip. * @en_hw_timestamp: Dependent on GPIO chip, an optional routine to * enable hardware timestamp. * @dis_hw_timestamp: Dependent on GPIO chip, an optional routine to * disable hardware timestamp. * @base: identifies the first GPIO number handled by this chip; * or, if negative during registration, requests dynamic ID allocation. * DEPRECATION: providing anything non-negative and nailing the base * offset of GPIO chips is deprecated. Please pass -1 as base to * let gpiolib select the chip base in all possible cases. We want to * get rid of the static GPIO number space in the long run. * @ngpio: the number of GPIOs handled by this controller; the last GPIO * handled is (base + ngpio - 1). * @offset: when multiple gpio chips belong to the same device this * can be used as offset within the device so friendly names can * be properly assigned. * @names: if set, must be an array of strings to use as alternative * names for the GPIOs in this chip. Any entry in the array * may be NULL if there is no alias for the GPIO, however the * array must be @ngpio entries long. A name can include a single printk * format specifier for an unsigned int. It is substituted by the actual * number of the gpio. * @can_sleep: flag must be set iff get()/set() methods sleep, as they * must while accessing GPIO expander chips over I2C or SPI. This * implies that if the chip supports IRQs, these IRQs need to be threaded * as the chip access may sleep when e.g. reading out the IRQ status * registers. * @read_reg: reader function for generic GPIO * @write_reg: writer function for generic GPIO * @be_bits: if the generic GPIO has big endian bit order (bit 31 is representing * line 0, bit 30 is line 1 ... bit 0 is line 31) this is set to true by the * generic GPIO core. It is for internal housekeeping only. * @reg_dat: data (in) register for generic GPIO * @reg_set: output set register (out=high) for generic GPIO * @reg_clr: output clear register (out=low) for generic GPIO * @reg_dir_out: direction out setting register for generic GPIO * @reg_dir_in: direction in setting register for generic GPIO * @bgpio_dir_unreadable: indicates that the direction register(s) cannot * be read and we need to rely on out internal state tracking. * @bgpio_bits: number of register bits used for a generic GPIO i.e. * <register width> * 8 * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep * shadowed and real data registers writes together. * @bgpio_data: shadowed data register for generic GPIO to clear/set bits * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. A "1" in this word means the line is set as * output. * * A gpio_chip can help platforms abstract various sources of GPIOs so * they can all be accessed through a common programming interface. * Example sources would be SOC controllers, FPGAs, multifunction * chips, dedicated GPIO expanders, and so on. * * Each chip controls a number of signals, identified in method calls * by "offset" values in the range 0..(@ngpio - 1). When those signals * are referenced through calls like gpio_get_value(gpio), the offset * is calculated by subtracting @base from the gpio number. */ struct gpio_chip { const char *label; struct gpio_device *gpiodev; struct device *parent; struct fwnode_handle *fwnode; struct module *owner; int (*request)(struct gpio_chip *gc, unsigned int offset); void (*free)(struct gpio_chip *gc, unsigned int offset); int (*get_direction)(struct gpio_chip *gc, unsigned int offset); int (*direction_input)(struct gpio_chip *gc, unsigned int offset); int (*direction_output)(struct gpio_chip *gc, unsigned int offset, int value); int (*get)(struct gpio_chip *gc, unsigned int offset); int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); void (*set)(struct gpio_chip *gc, unsigned int offset, int value); void (*set_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); int (*to_irq)(struct gpio_chip *gc, unsigned int offset); void (*dbg_show)(struct seq_file *s, struct gpio_chip *gc); int (*init_valid_mask)(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios); int (*add_pin_ranges)(struct gpio_chip *gc); int (*en_hw_timestamp)(struct gpio_chip *gc, u32 offset, unsigned long flags); int (*dis_hw_timestamp)(struct gpio_chip *gc, u32 offset, unsigned long flags); int base; u16 ngpio; u16 offset; const char *const *names; bool can_sleep; #if IS_ENABLED(CONFIG_GPIO_GENERIC) unsigned long (*read_reg)(void __iomem *reg); void (*write_reg)(void __iomem *reg, unsigned long data); bool be_bits; void __iomem *reg_dat; void __iomem *reg_set; void __iomem *reg_clr; void __iomem *reg_dir_out; void __iomem *reg_dir_in; bool bgpio_dir_unreadable; int bgpio_bits; raw_spinlock_t bgpio_lock; unsigned long bgpio_data; unsigned long bgpio_dir; #endif /* CONFIG_GPIO_GENERIC */ #ifdef CONFIG_GPIOLIB_IRQCHIP /* * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ /** * @irq: * * Integrates interrupt chip functionality with the GPIO chip. Can be * used to handle IRQs for most practical cases. */ struct gpio_irq_chip irq; #endif /* CONFIG_GPIOLIB_IRQCHIP */ /** * @valid_mask: * * If not %NULL, holds bitmask of GPIOs which are valid to be used * from the chip. */ unsigned long *valid_mask; #if defined(CONFIG_OF_GPIO) /* * If CONFIG_OF_GPIO is enabled, then all GPIO controllers described in * the device tree automatically may have an OF translation */ /** * @of_gpio_n_cells: * * Number of cells used to form the GPIO specifier. */ unsigned int of_gpio_n_cells; /** * @of_xlate: * * Callback to translate a device tree GPIO specifier into a chip- * relative GPIO number and flags. */ int (*of_xlate)(struct gpio_chip *gc, const struct of_phandle_args *gpiospec, u32 *flags); #endif /* CONFIG_OF_GPIO */ }; char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset); struct _gpiochip_for_each_data { const char **label; unsigned int *i; }; DEFINE_CLASS(_gpiochip_for_each_data, struct _gpiochip_for_each_data, if (*_T.label) kfree(*_T.label), ({ struct _gpiochip_for_each_data _data = { label, i }; *_data.i = 0; _data; }), const char **label, int *i) /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range * @_chip: the chip to query * @_i: loop variable * @_base: first GPIO in the range * @_size: amount of GPIOs to check starting from @base * @_label: label of current GPIO */ #define for_each_requested_gpio_in_range(_chip, _i, _base, _size, _label) \ for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ *_data.i < _size; \ (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ if ((*_data.label = \ gpiochip_dup_line_label(_chip, _base + *_data.i)) == NULL) {} \ else if (IS_ERR(*_data.label)) {} \ else /* Iterates over all requested GPIO of the given @chip */ #define for_each_requested_gpio(chip, i, label) \ for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label) /* add/remove chips */ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip * @gc: the chip to register, with gc->base initialized * @data: driver-private data associated with this chip * * Context: potentially before irqs will work * * When gpiochip_add_data() is called very early during boot, so that GPIOs * can be freely used, the gc->parent device must be registered before * the gpio framework's arch_initcall(). Otherwise sysfs initialization * for GPIOs will fail rudely. * * gpiochip_add_data() must only be called after gpiolib initialization, * i.e. after core_initcall(). * * If gc->base is negative, this requests dynamic assignment of * a range of valid GPIOs. * * Returns: * A negative errno if the chip can't be registered, such as because the * gc->base is invalid or already associated with a different chip. * Otherwise it returns zero as a success code. */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(gc, data) ({ \ static struct lock_class_key lock_key; \ static struct lock_class_key request_key; \ gpiochip_add_data_with_key(gc, data, &lock_key, \ &request_key); \ }) #define devm_gpiochip_add_data(dev, gc, data) ({ \ static struct lock_class_key lock_key; \ static struct lock_class_key request_key; \ devm_gpiochip_add_data_with_key(dev, gc, data, &lock_key, \ &request_key); \ }) #else #define gpiochip_add_data(gc, data) gpiochip_add_data_with_key(gc, data, NULL, NULL) #define devm_gpiochip_add_data(dev, gc, data) \ devm_gpiochip_add_data_with_key(dev, gc, data, NULL, NULL) #endif /* CONFIG_LOCKDEP */ static inline int gpiochip_add(struct gpio_chip *gc) { return gpiochip_add_data(gc, NULL); } void gpiochip_remove(struct gpio_chip *gc); int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key); struct gpio_device *gpio_device_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); struct gpio_device *gpio_device_find_by_label(const char *label); struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); DEFINE_FREE(gpio_device_put, struct gpio_device *, if (!IS_ERR_OR_NULL(_T)) gpio_device_put(_T)) struct device *gpio_device_to_device(struct gpio_device *gdev); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset); /* irq_data versions of the above */ int gpiochip_irq_reqres(struct irq_data *data); void gpiochip_irq_relres(struct irq_data *data); /* Paste this in your irq_chip structure */ #define GPIOCHIP_IRQ_RESOURCE_HELPERS \ .irq_request_resources = gpiochip_irq_reqres, \ .irq_release_resources = gpiochip_irq_relres static inline void gpio_irq_chip_set_chip(struct gpio_irq_chip *girq, const struct irq_chip *chip) { /* Yes, dropping const is ugly, but it isn't like we have a choice */ girq->chip = (struct irq_chip *)chip; } /* Line status inquiry for drivers */ bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset); /* Sleep persistence inquiry for drivers */ bool gpiochip_line_is_persistent(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset); /* get driver data */ void *gpiochip_get_data(struct gpio_chip *gc); struct bgpio_pdata { const char *label; int base; int ngpio; }; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type); int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type); #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ int bgpio_init(struct gpio_chip *gc, struct device *dev, unsigned long sz, void __iomem *dat, void __iomem *set, void __iomem *clr, void __iomem *dirout, void __iomem *dirin, unsigned long flags); #define BGPIOF_BIG_ENDIAN BIT(0) #define BGPIOF_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ #define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ #define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) #define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq); void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); int gpiochip_irq_domain_activate(struct irq_domain *domain, struct irq_data *data, bool reserve); void gpiochip_irq_domain_deactivate(struct irq_domain *domain, struct irq_data *data); bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, unsigned int offset); #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); #else #include <asm/bug.h> static inline int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain) { WARN_ON(1); return -EINVAL; } #endif int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset); void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset); int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, unsigned long config); /** * struct gpio_pin_range - pin range controlled by a gpio chip * @node: list for maintaining set of pin ranges, used internally * @pctldev: pinctrl device which handles corresponding pins * @range: actual range of pins controlled by a gpio controller */ struct gpio_pin_range { struct list_head node; struct pinctrl_dev *pctldev; struct pinctrl_gpio_range range; }; #ifdef CONFIG_PINCTRL int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, unsigned int npins); int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group); void gpiochip_remove_pin_ranges(struct gpio_chip *gc); #else /* ! CONFIG_PINCTRL */ static inline int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, unsigned int npins) { return 0; } static inline int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group) { return 0; } static inline void gpiochip_remove_pin_ranges(struct gpio_chip *gc) { } #endif /* CONFIG_PINCTRL */ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, unsigned int hwnum, const char *label, enum gpio_lookup_flags lflags, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); struct gpio_desc * gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev); #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); /* struct gpio_device getters */ int gpio_device_get_base(struct gpio_device *gdev); const char *gpio_device_get_label(struct gpio_device *gdev); #else /* CONFIG_GPIOLIB */ #include <asm/bug.h> static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) { /* GPIO can never have been requested */ WARN_ON(1); return ERR_PTR(-ENODEV); } static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) { WARN_ON(1); return ERR_PTR(-ENODEV); } static inline int gpio_device_get_base(struct gpio_device *gdev) { WARN_ON(1); return -ENODEV; } static inline const char *gpio_device_get_label(struct gpio_device *gdev) { WARN_ON(1); return NULL; } static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { WARN_ON(1); return -EINVAL; } static inline void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset) { WARN_ON(1); } #endif /* CONFIG_GPIOLIB */ #define for_each_gpiochip_node(dev, child) \ device_for_each_child_node(dev, child) \ if (!fwnode_property_present(child, "gpio-controller")) {} else static inline unsigned int gpiochip_node_count(struct device *dev) { struct fwnode_handle *child; unsigned int count = 0; for_each_gpiochip_node(dev, child) count++; return count; } static inline struct fwnode_handle *gpiochip_node_get_first(struct device *dev) { struct fwnode_handle *fwnode; for_each_gpiochip_node(dev, fwnode) return fwnode; return NULL; } #endif /* __LINUX_GPIO_DRIVER_H */
2 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 // SPDX-License-Identifier: GPL-2.0-or-later /* Linux driver for Philips webcam USB and Video4Linux interface part. (C) 1999-2004 Nemosoft Unv. (C) 2004-2006 Luc Saillard (luc@saillard.org) (C) 2011 Hans de Goede <hdegoede@redhat.com> NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. The decompression routines have been implemented by reverse-engineering the Nemosoft binary pwcx module. Caveat emptor. */ /* This code forms the interface between the USB layers and the Philips specific stuff. Some adanved stuff of the driver falls under an NDA, signed between me and Philips B.V., Eindhoven, the Netherlands, and is thus not distributed in source form. The binary pwcx.o module contains the code that falls under the NDA. In case you're wondering: 'pwc' stands for "Philips WebCam", but I really didn't want to type 'philips_web_cam' every time (I'm lazy as any Linux kernel hacker, but I don't like uncomprehensible abbreviations without explanation). Oh yes, convention: to disctinguish between all the various pointers to device-structures, I use these names for the pointer variables: udev: struct usb_device * vdev: struct video_device (member of pwc_dev) pdev: struct pwc_devive * */ /* Contributors: - Alvarado: adding whitebalance code - Alistar Moire: QuickCam 3000 Pro device/product ID - Tony Hoyle: Creative Labs Webcam 5 device/product ID - Mark Burazin: solving hang in VIDIOCSYNC when camera gets unplugged - Jk Fang: Sotec Afina Eye ID - Xavier Roche: QuickCam Pro 4000 ID - Jens Knudsen: QuickCam Zoom ID - J. Debert: QuickCam for Notebooks ID - Pham Thanh Nam: webcam snapshot button as an event input device */ #include <linux/errno.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/slab.h> #ifdef CONFIG_USB_PWC_INPUT_EVDEV #include <linux/usb/input.h> #endif #include <linux/vmalloc.h> #include <asm/io.h> #include <linux/kernel.h> /* simple_strtol() */ #include "pwc.h" #include "pwc-kiara.h" #include "pwc-timon.h" #include "pwc-dec23.h" #include "pwc-dec1.h" #define CREATE_TRACE_POINTS #include <trace/events/pwc.h> /* Function prototypes and driver templates */ /* hotplug device table support */ static const struct usb_device_id pwc_device_table [] = { { USB_DEVICE(0x041E, 0x400C) }, /* Creative Webcam 5 */ { USB_DEVICE(0x041E, 0x4011) }, /* Creative Webcam Pro Ex */ { USB_DEVICE(0x046D, 0x08B0) }, /* Logitech QuickCam 3000 Pro */ { USB_DEVICE(0x046D, 0x08B1) }, /* Logitech QuickCam Notebook Pro */ { USB_DEVICE(0x046D, 0x08B2) }, /* Logitech QuickCam 4000 Pro */ { USB_DEVICE(0x046D, 0x08B3) }, /* Logitech QuickCam Zoom (old model) */ { USB_DEVICE(0x046D, 0x08B4) }, /* Logitech QuickCam Zoom (new model) */ { USB_DEVICE(0x046D, 0x08B5) }, /* Logitech QuickCam Orbit/Sphere */ { USB_DEVICE(0x046D, 0x08B6) }, /* Logitech/Cisco VT Camera */ { USB_DEVICE(0x046D, 0x08B7) }, /* Logitech ViewPort AV 100 */ { USB_DEVICE(0x046D, 0x08B8) }, /* Logitech QuickCam */ { USB_DEVICE(0x0471, 0x0302) }, /* Philips PCA645VC */ { USB_DEVICE(0x0471, 0x0303) }, /* Philips PCA646VC */ { USB_DEVICE(0x0471, 0x0304) }, /* Askey VC010 type 2 */ { USB_DEVICE(0x0471, 0x0307) }, /* Philips PCVC675K (Vesta) */ { USB_DEVICE(0x0471, 0x0308) }, /* Philips PCVC680K (Vesta Pro) */ { USB_DEVICE(0x0471, 0x030C) }, /* Philips PCVC690K (Vesta Pro Scan) */ { USB_DEVICE(0x0471, 0x0310) }, /* Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) */ { USB_DEVICE(0x0471, 0x0311) }, /* Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) */ { USB_DEVICE(0x0471, 0x0312) }, /* Philips PCVC750K (ToUCam Pro Scan) */ { USB_DEVICE(0x0471, 0x0313) }, /* Philips PCVC720K/40 (ToUCam XS) */ { USB_DEVICE(0x0471, 0x0329) }, /* Philips SPC 900NC webcam */ { USB_DEVICE(0x0471, 0x032C) }, /* Philips SPC 880NC webcam */ { USB_DEVICE(0x04CC, 0x8116) }, /* Sotec Afina Eye */ { USB_DEVICE(0x055D, 0x9000) }, /* Samsung MPC-C10 */ { USB_DEVICE(0x055D, 0x9001) }, /* Samsung MPC-C30 */ { USB_DEVICE(0x055D, 0x9002) }, /* Samsung SNC-35E (Ver3.0) */ { USB_DEVICE(0x069A, 0x0001) }, /* Askey VC010 type 1 */ { USB_DEVICE(0x06BE, 0x8116) }, /* AME Co. Afina Eye */ { USB_DEVICE(0x0d81, 0x1900) }, /* Visionite VCS-UC300 */ { USB_DEVICE(0x0d81, 0x1910) }, /* Visionite VCS-UM100 */ { } }; MODULE_DEVICE_TABLE(usb, pwc_device_table); static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id); static void usb_pwc_disconnect(struct usb_interface *intf); static void pwc_isoc_cleanup(struct pwc_device *pdev); static struct usb_driver pwc_driver = { .name = "Philips webcam", /* name */ .id_table = pwc_device_table, .probe = usb_pwc_probe, /* probe() */ .disconnect = usb_pwc_disconnect, /* disconnect() */ }; #define MAX_DEV_HINTS 20 #define MAX_ISOC_ERRORS 20 #ifdef CONFIG_USB_PWC_DEBUG int pwc_trace = PWC_DEBUG_LEVEL; #endif static int power_save = -1; static int leds[2] = { 100, 0 }; /***/ static const struct v4l2_file_operations pwc_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .read = vb2_fop_read, .poll = vb2_fop_poll, .mmap = vb2_fop_mmap, .unlocked_ioctl = video_ioctl2, }; static const struct video_device pwc_template = { .name = "Philips Webcam", /* Filled in later */ .release = video_device_release_empty, .fops = &pwc_fops, .ioctl_ops = &pwc_ioctl_ops, }; /***************************************************************************/ /* Private functions */ static void *pwc_alloc_urb_buffer(struct usb_device *dev, size_t size, dma_addr_t *dma_handle) { struct device *dmadev = dev->bus->sysdev; void *buffer = kmalloc(size, GFP_KERNEL); if (!buffer) return NULL; *dma_handle = dma_map_single(dmadev, buffer, size, DMA_FROM_DEVICE); if (dma_mapping_error(dmadev, *dma_handle)) { kfree(buffer); return NULL; } return buffer; } static void pwc_free_urb_buffer(struct usb_device *dev, size_t size, void *buffer, dma_addr_t dma_handle) { struct device *dmadev = dev->bus->sysdev; dma_unmap_single(dmadev, dma_handle, size, DMA_FROM_DEVICE); kfree(buffer); } static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev) { unsigned long flags = 0; struct pwc_frame_buf *buf = NULL; spin_lock_irqsave(&pdev->queued_bufs_lock, flags); if (list_empty(&pdev->queued_bufs)) goto leave; buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf, list); list_del(&buf->list); leave: spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); return buf; } static void pwc_snapshot_button(struct pwc_device *pdev, int down) { if (down) { PWC_TRACE("Snapshot button pressed.\n"); } else { PWC_TRACE("Snapshot button released.\n"); } #ifdef CONFIG_USB_PWC_INPUT_EVDEV if (pdev->button_dev) { input_report_key(pdev->button_dev, KEY_CAMERA, down); input_sync(pdev->button_dev); } #endif } static void pwc_frame_complete(struct pwc_device *pdev) { struct pwc_frame_buf *fbuf = pdev->fill_buf; /* The ToUCam Fun CMOS sensor causes the firmware to send 2 or 3 bogus frames on the USB wire after an exposure change. This conditition is however detected in the cam and a bit is set in the header. */ if (pdev->type == 730) { unsigned char *ptr = (unsigned char *)fbuf->data; if (ptr[1] == 1 && ptr[0] & 0x10) { PWC_TRACE("Hyundai CMOS sensor bug. Dropping frame.\n"); pdev->drop_frames += 2; } if ((ptr[0] ^ pdev->vmirror) & 0x01) { pwc_snapshot_button(pdev, ptr[0] & 0x01); } if ((ptr[0] ^ pdev->vmirror) & 0x02) { if (ptr[0] & 0x02) PWC_TRACE("Image is mirrored.\n"); else PWC_TRACE("Image is normal.\n"); } pdev->vmirror = ptr[0] & 0x03; /* Sometimes the trailer of the 730 is still sent as a 4 byte packet after a short frame; this condition is filtered out specifically. A 4 byte frame doesn't make sense anyway. So we get either this sequence: drop_bit set -> 4 byte frame -> short frame -> good frame Or this one: drop_bit set -> short frame -> good frame So we drop either 3 or 2 frames in all! */ if (fbuf->filled == 4) pdev->drop_frames++; } else if (pdev->type == 740 || pdev->type == 720) { unsigned char *ptr = (unsigned char *)fbuf->data; if ((ptr[0] ^ pdev->vmirror) & 0x01) { pwc_snapshot_button(pdev, ptr[0] & 0x01); } pdev->vmirror = ptr[0] & 0x03; } /* In case we were instructed to drop the frame, do so silently. */ if (pdev->drop_frames > 0) { pdev->drop_frames--; } else { /* Check for underflow first */ if (fbuf->filled < pdev->frame_total_size) { PWC_DEBUG_FLOW("Frame buffer underflow (%d bytes); discarded.\n", fbuf->filled); } else { fbuf->vb.field = V4L2_FIELD_NONE; fbuf->vb.sequence = pdev->vframe_count; vb2_buffer_done(&fbuf->vb.vb2_buf, VB2_BUF_STATE_DONE); pdev->fill_buf = NULL; pdev->vsync = 0; } } /* !drop_frames */ pdev->vframe_count++; } /* This gets called for the Isochronous pipe (video). This is done in * interrupt time, so it has to be fast, not crash, and not stall. Neat. */ static void pwc_isoc_handler(struct urb *urb) { struct pwc_device *pdev = (struct pwc_device *)urb->context; struct device *dmadev = urb->dev->bus->sysdev; int i, fst, flen; unsigned char *iso_buf = NULL; trace_pwc_handler_enter(urb, pdev); if (urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN) { PWC_DEBUG_OPEN("URB (%p) unlinked %ssynchronously.\n", urb, urb->status == -ENOENT ? "" : "a"); return; } if (pdev->fill_buf == NULL) pdev->fill_buf = pwc_get_next_fill_buf(pdev); if (urb->status != 0) { const char *errmsg; errmsg = "Unknown"; switch(urb->status) { case -ENOSR: errmsg = "Buffer error (overrun)"; break; case -EPIPE: errmsg = "Stalled (device not responding)"; break; case -EOVERFLOW: errmsg = "Babble (bad cable?)"; break; case -EPROTO: errmsg = "Bit-stuff error (bad cable?)"; break; case -EILSEQ: errmsg = "CRC/Timeout (could be anything)"; break; case -ETIME: errmsg = "Device does not respond"; break; } PWC_ERROR("pwc_isoc_handler() called with status %d [%s].\n", urb->status, errmsg); /* Give up after a number of contiguous errors */ if (++pdev->visoc_errors > MAX_ISOC_ERRORS) { PWC_ERROR("Too many ISOC errors, bailing out.\n"); if (pdev->fill_buf) { vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); pdev->fill_buf = NULL; } } pdev->vsync = 0; /* Drop the current frame */ goto handler_end; } /* Reset ISOC error counter. We did get here, after all. */ pdev->visoc_errors = 0; dma_sync_single_for_cpu(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); /* vsync: 0 = don't copy data 1 = sync-hunt 2 = synched */ /* Compact data */ for (i = 0; i < urb->number_of_packets; i++) { fst = urb->iso_frame_desc[i].status; flen = urb->iso_frame_desc[i].actual_length; iso_buf = urb->transfer_buffer + urb->iso_frame_desc[i].offset; if (fst != 0) { PWC_ERROR("Iso frame %d has error %d\n", i, fst); continue; } if (flen > 0 && pdev->vsync) { struct pwc_frame_buf *fbuf = pdev->fill_buf; if (pdev->vsync == 1) { fbuf->vb.vb2_buf.timestamp = ktime_get_ns(); pdev->vsync = 2; } if (flen + fbuf->filled > pdev->frame_total_size) { PWC_ERROR("Frame overflow (%d > %d)\n", flen + fbuf->filled, pdev->frame_total_size); pdev->vsync = 0; /* Let's wait for an EOF */ } else { memcpy(fbuf->data + fbuf->filled, iso_buf, flen); fbuf->filled += flen; } } if (flen < pdev->vlast_packet_size) { /* Shorter packet... end of frame */ if (pdev->vsync == 2) pwc_frame_complete(pdev); if (pdev->fill_buf == NULL) pdev->fill_buf = pwc_get_next_fill_buf(pdev); if (pdev->fill_buf) { pdev->fill_buf->filled = 0; pdev->vsync = 1; } } pdev->vlast_packet_size = flen; } dma_sync_single_for_device(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); handler_end: trace_pwc_handler_exit(urb, pdev); i = usb_submit_urb(urb, GFP_ATOMIC); if (i != 0) PWC_ERROR("Error (%d) re-submitting urb in pwc_isoc_handler.\n", i); } /* Both v4l2_lock and vb_queue_lock should be locked when calling this */ static int pwc_isoc_init(struct pwc_device *pdev) { struct usb_device *udev; struct urb *urb; int i, j, ret; struct usb_interface *intf; struct usb_host_interface *idesc = NULL; int compression = 0; /* 0..3 = uncompressed..high */ pdev->vsync = 0; pdev->vlast_packet_size = 0; pdev->fill_buf = NULL; pdev->vframe_count = 0; pdev->visoc_errors = 0; udev = pdev->udev; retry: /* We first try with low compression and then retry with a higher compression setting if there is not enough bandwidth. */ ret = pwc_set_video_mode(pdev, pdev->width, pdev->height, pdev->pixfmt, pdev->vframes, &compression, 1); /* Get the current alternate interface, adjust packet size */ intf = usb_ifnum_to_if(udev, 0); if (intf) idesc = usb_altnum_to_altsetting(intf, pdev->valternate); if (!idesc) return -EIO; /* Search video endpoint */ pdev->vmax_packet_size = -1; for (i = 0; i < idesc->desc.bNumEndpoints; i++) { if ((idesc->endpoint[i].desc.bEndpointAddress & 0xF) == pdev->vendpoint) { pdev->vmax_packet_size = le16_to_cpu(idesc->endpoint[i].desc.wMaxPacketSize); break; } } if (pdev->vmax_packet_size < 0 || pdev->vmax_packet_size > ISO_MAX_FRAME_SIZE) { PWC_ERROR("Failed to find packet size for video endpoint in current alternate setting.\n"); return -ENFILE; /* Odd error, that should be noticeable */ } /* Set alternate interface */ PWC_DEBUG_OPEN("Setting alternate interface %d\n", pdev->valternate); ret = usb_set_interface(pdev->udev, 0, pdev->valternate); if (ret == -ENOSPC && compression < 3) { compression++; goto retry; } if (ret < 0) return ret; /* Allocate and init Isochronuous urbs */ for (i = 0; i < MAX_ISO_BUFS; i++) { urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL); if (urb == NULL) { pwc_isoc_cleanup(pdev); return -ENOMEM; } pdev->urbs[i] = urb; PWC_DEBUG_MEMORY("Allocated URB at 0x%p\n", urb); urb->interval = 1; // devik urb->dev = udev; urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; urb->transfer_buffer_length = ISO_BUFFER_SIZE; urb->transfer_buffer = pwc_alloc_urb_buffer(udev, urb->transfer_buffer_length, &urb->transfer_dma); if (urb->transfer_buffer == NULL) { PWC_ERROR("Failed to allocate urb buffer %d\n", i); pwc_isoc_cleanup(pdev); return -ENOMEM; } urb->complete = pwc_isoc_handler; urb->context = pdev; urb->start_frame = 0; urb->number_of_packets = ISO_FRAMES_PER_DESC; for (j = 0; j < ISO_FRAMES_PER_DESC; j++) { urb->iso_frame_desc[j].offset = j * ISO_MAX_FRAME_SIZE; urb->iso_frame_desc[j].length = pdev->vmax_packet_size; } } /* link */ for (i = 0; i < MAX_ISO_BUFS; i++) { ret = usb_submit_urb(pdev->urbs[i], GFP_KERNEL); if (ret == -ENOSPC && compression < 3) { compression++; pwc_isoc_cleanup(pdev); goto retry; } if (ret) { PWC_ERROR("isoc_init() submit_urb %d failed with error %d\n", i, ret); pwc_isoc_cleanup(pdev); return ret; } PWC_DEBUG_MEMORY("URB 0x%p submitted.\n", pdev->urbs[i]); } /* All is done... */ PWC_DEBUG_OPEN("<< pwc_isoc_init()\n"); return 0; } static void pwc_iso_stop(struct pwc_device *pdev) { int i; /* Unlinking ISOC buffers one by one */ for (i = 0; i < MAX_ISO_BUFS; i++) { if (pdev->urbs[i]) { PWC_DEBUG_MEMORY("Unlinking URB %p\n", pdev->urbs[i]); usb_kill_urb(pdev->urbs[i]); } } } static void pwc_iso_free(struct pwc_device *pdev) { int i; /* Freeing ISOC buffers one by one */ for (i = 0; i < MAX_ISO_BUFS; i++) { struct urb *urb = pdev->urbs[i]; if (urb) { PWC_DEBUG_MEMORY("Freeing URB\n"); if (urb->transfer_buffer) pwc_free_urb_buffer(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); usb_free_urb(urb); pdev->urbs[i] = NULL; } } } /* Both v4l2_lock and vb_queue_lock should be locked when calling this */ static void pwc_isoc_cleanup(struct pwc_device *pdev) { PWC_DEBUG_OPEN(">> pwc_isoc_cleanup()\n"); pwc_iso_stop(pdev); pwc_iso_free(pdev); usb_set_interface(pdev->udev, 0, 0); PWC_DEBUG_OPEN("<< pwc_isoc_cleanup()\n"); } /* Must be called with vb_queue_lock hold */ static void pwc_cleanup_queued_bufs(struct pwc_device *pdev, enum vb2_buffer_state state) { unsigned long flags = 0; spin_lock_irqsave(&pdev->queued_bufs_lock, flags); while (!list_empty(&pdev->queued_bufs)) { struct pwc_frame_buf *buf; buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf, list); list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, state); } spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); } #ifdef CONFIG_USB_PWC_DEBUG static const char *pwc_sensor_type_to_string(unsigned int sensor_type) { switch(sensor_type) { case 0x00: return "Hyundai CMOS sensor"; case 0x20: return "Sony CCD sensor + TDA8787"; case 0x2E: return "Sony CCD sensor + Exas 98L59"; case 0x2F: return "Sony CCD sensor + ADI 9804"; case 0x30: return "Sharp CCD sensor + TDA8787"; case 0x3E: return "Sharp CCD sensor + Exas 98L59"; case 0x3F: return "Sharp CCD sensor + ADI 9804"; case 0x40: return "UPA 1021 sensor"; case 0x100: return "VGA sensor"; case 0x101: return "PAL MR sensor"; default: return "unknown type of sensor"; } } #endif /***************************************************************************/ /* Video4Linux functions */ static void pwc_video_release(struct v4l2_device *v) { struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev); v4l2_ctrl_handler_free(&pdev->ctrl_handler); v4l2_device_unregister(&pdev->v4l2_dev); kfree(pdev->ctrl_buf); kfree(pdev); } /***************************************************************************/ /* Videobuf2 operations */ static int queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct pwc_device *pdev = vb2_get_drv_priv(vq); int size; if (*nbuffers < MIN_FRAMES) *nbuffers = MIN_FRAMES; else if (*nbuffers > MAX_FRAMES) *nbuffers = MAX_FRAMES; *nplanes = 1; size = pwc_get_size(pdev, MAX_WIDTH, MAX_HEIGHT); sizes[0] = PAGE_ALIGN(pwc_image_sizes[size][0] * pwc_image_sizes[size][1] * 3 / 2); return 0; } static int buffer_init(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); /* need vmalloc since frame buffer > 128K */ buf->data = vzalloc(PWC_FRAME_SIZE); if (buf->data == NULL) return -ENOMEM; return 0; } static int buffer_prepare(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); /* Don't allow queueing new buffers after device disconnection */ if (!pdev->udev) return -ENODEV; return 0; } static void buffer_finish(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); if (vb->state == VB2_BUF_STATE_DONE) { /* * Application has called dqbuf and is getting back a buffer * we've filled, take the pwc data we've stored in buf->data * and decompress it into a usable format, storing the result * in the vb2_buffer. */ pwc_decompress(pdev, buf); } } static void buffer_cleanup(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); vfree(buf->data); } static void buffer_queue(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); unsigned long flags = 0; /* Check the device has not disconnected between prep and queuing */ if (!pdev->udev) { vb2_buffer_done(vb, VB2_BUF_STATE_ERROR); return; } spin_lock_irqsave(&pdev->queued_bufs_lock, flags); list_add_tail(&buf->list, &pdev->queued_bufs); spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); } static int start_streaming(struct vb2_queue *vq, unsigned int count) { struct pwc_device *pdev = vb2_get_drv_priv(vq); int r; if (!pdev->udev) return -ENODEV; if (mutex_lock_interruptible(&pdev->v4l2_lock)) return -ERESTARTSYS; /* Turn on camera and set LEDS on */ pwc_camera_power(pdev, 1); pwc_set_leds(pdev, leds[0], leds[1]); r = pwc_isoc_init(pdev); if (r) { /* If we failed turn camera and LEDS back off */ pwc_set_leds(pdev, 0, 0); pwc_camera_power(pdev, 0); /* And cleanup any queued bufs!! */ pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_QUEUED); } mutex_unlock(&pdev->v4l2_lock); return r; } static void stop_streaming(struct vb2_queue *vq) { struct pwc_device *pdev = vb2_get_drv_priv(vq); mutex_lock(&pdev->v4l2_lock); if (pdev->udev) { pwc_set_leds(pdev, 0, 0); pwc_camera_power(pdev, 0); pwc_isoc_cleanup(pdev); } pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_ERROR); if (pdev->fill_buf) vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); mutex_unlock(&pdev->v4l2_lock); } static const struct vb2_ops pwc_vb_queue_ops = { .queue_setup = queue_setup, .buf_init = buffer_init, .buf_prepare = buffer_prepare, .buf_finish = buffer_finish, .buf_cleanup = buffer_cleanup, .buf_queue = buffer_queue, .start_streaming = start_streaming, .stop_streaming = stop_streaming, .wait_prepare = vb2_ops_wait_prepare, .wait_finish = vb2_ops_wait_finish, }; /***************************************************************************/ /* USB functions */ /* This function gets called when a new device is plugged in or the usb core * is loaded. */ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct pwc_device *pdev = NULL; int vendor_id, product_id, type_id; int rc; int features = 0; int compression = 0; int my_power_save = power_save; char serial_number[30], *name; vendor_id = le16_to_cpu(udev->descriptor.idVendor); product_id = le16_to_cpu(udev->descriptor.idProduct); /* Check if we can handle this device */ PWC_DEBUG_PROBE("probe() called [%04X %04X], if %d\n", vendor_id, product_id, intf->altsetting->desc.bInterfaceNumber); /* the interfaces are probed one by one. We are only interested in the video interface (0) now. Interface 1 is the Audio Control, and interface 2 Audio itself. */ if (intf->altsetting->desc.bInterfaceNumber > 0) return -ENODEV; if (vendor_id == 0x0471) { switch (product_id) { case 0x0302: PWC_INFO("Philips PCA645VC USB webcam detected.\n"); name = "Philips 645 webcam"; type_id = 645; break; case 0x0303: PWC_INFO("Philips PCA646VC USB webcam detected.\n"); name = "Philips 646 webcam"; type_id = 646; break; case 0x0304: PWC_INFO("Askey VC010 type 2 USB webcam detected.\n"); name = "Askey VC010 webcam"; type_id = 646; break; case 0x0307: PWC_INFO("Philips PCVC675K (Vesta) USB webcam detected.\n"); name = "Philips 675 webcam"; type_id = 675; break; case 0x0308: PWC_INFO("Philips PCVC680K (Vesta Pro) USB webcam detected.\n"); name = "Philips 680 webcam"; type_id = 680; break; case 0x030C: PWC_INFO("Philips PCVC690K (Vesta Pro Scan) USB webcam detected.\n"); name = "Philips 690 webcam"; type_id = 690; break; case 0x0310: PWC_INFO("Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) USB webcam detected.\n"); name = "Philips 730 webcam"; type_id = 730; break; case 0x0311: PWC_INFO("Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) USB webcam detected.\n"); name = "Philips 740 webcam"; type_id = 740; break; case 0x0312: PWC_INFO("Philips PCVC750K (ToUCam Pro Scan) USB webcam detected.\n"); name = "Philips 750 webcam"; type_id = 750; break; case 0x0313: PWC_INFO("Philips PCVC720K/40 (ToUCam XS) USB webcam detected.\n"); name = "Philips 720K/40 webcam"; type_id = 720; break; case 0x0329: PWC_INFO("Philips SPC 900NC USB webcam detected.\n"); name = "Philips SPC 900NC webcam"; type_id = 740; break; case 0x032C: PWC_INFO("Philips SPC 880NC USB webcam detected.\n"); name = "Philips SPC 880NC webcam"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x069A) { switch(product_id) { case 0x0001: PWC_INFO("Askey VC010 type 1 USB webcam detected.\n"); name = "Askey VC010 webcam"; type_id = 645; break; default: return -ENODEV; } } else if (vendor_id == 0x046d) { switch(product_id) { case 0x08b0: PWC_INFO("Logitech QuickCam Pro 3000 USB webcam detected.\n"); name = "Logitech QuickCam Pro 3000"; type_id = 740; /* CCD sensor */ break; case 0x08b1: PWC_INFO("Logitech QuickCam Notebook Pro USB webcam detected.\n"); name = "Logitech QuickCam Notebook Pro"; type_id = 740; /* CCD sensor */ break; case 0x08b2: PWC_INFO("Logitech QuickCam 4000 Pro USB webcam detected.\n"); name = "Logitech QuickCam Pro 4000"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; break; case 0x08b3: PWC_INFO("Logitech QuickCam Zoom USB webcam detected.\n"); name = "Logitech QuickCam Zoom"; type_id = 740; /* CCD sensor */ break; case 0x08B4: PWC_INFO("Logitech QuickCam Zoom (new model) USB webcam detected.\n"); name = "Logitech QuickCam Zoom"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; break; case 0x08b5: PWC_INFO("Logitech QuickCam Orbit/Sphere USB webcam detected.\n"); name = "Logitech QuickCam Orbit"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; features |= FEATURE_MOTOR_PANTILT; break; case 0x08b6: PWC_INFO("Logitech/Cisco VT Camera webcam detected.\n"); name = "Cisco VT Camera"; type_id = 740; /* CCD sensor */ break; case 0x08b7: PWC_INFO("Logitech ViewPort AV 100 webcam detected.\n"); name = "Logitech ViewPort AV 100"; type_id = 740; /* CCD sensor */ break; case 0x08b8: /* Where this released? */ PWC_INFO("Logitech QuickCam detected (reserved ID).\n"); name = "Logitech QuickCam (res.)"; type_id = 730; /* Assuming CMOS */ break; default: return -ENODEV; } } else if (vendor_id == 0x055d) { /* I don't know the difference between the C10 and the C30; I suppose the difference is the sensor, but both cameras work equally well with a type_id of 675 */ switch(product_id) { case 0x9000: PWC_INFO("Samsung MPC-C10 USB webcam detected.\n"); name = "Samsung MPC-C10"; type_id = 675; break; case 0x9001: PWC_INFO("Samsung MPC-C30 USB webcam detected.\n"); name = "Samsung MPC-C30"; type_id = 675; break; case 0x9002: PWC_INFO("Samsung SNC-35E (v3.0) USB webcam detected.\n"); name = "Samsung MPC-C30"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x041e) { switch(product_id) { case 0x400c: PWC_INFO("Creative Labs Webcam 5 detected.\n"); name = "Creative Labs Webcam 5"; type_id = 730; if (my_power_save == -1) my_power_save = 1; break; case 0x4011: PWC_INFO("Creative Labs Webcam Pro Ex detected.\n"); name = "Creative Labs Webcam Pro Ex"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x04cc) { switch(product_id) { case 0x8116: PWC_INFO("Sotec Afina Eye USB webcam detected.\n"); name = "Sotec Afina Eye"; type_id = 730; break; default: return -ENODEV; } } else if (vendor_id == 0x06be) { switch(product_id) { case 0x8116: /* This is essentially the same cam as the Sotec Afina Eye */ PWC_INFO("AME Co. Afina Eye USB webcam detected.\n"); name = "AME Co. Afina Eye"; type_id = 750; break; default: return -ENODEV; } } else if (vendor_id == 0x0d81) { switch(product_id) { case 0x1900: PWC_INFO("Visionite VCS-UC300 USB webcam detected.\n"); name = "Visionite VCS-UC300"; type_id = 740; /* CCD sensor */ break; case 0x1910: PWC_INFO("Visionite VCS-UM100 USB webcam detected.\n"); name = "Visionite VCS-UM100"; type_id = 730; /* CMOS sensor */ break; default: return -ENODEV; } } else return -ENODEV; /* Not any of the know types; but the list keeps growing. */ if (my_power_save == -1) my_power_save = 0; memset(serial_number, 0, 30); usb_string(udev, udev->descriptor.iSerialNumber, serial_number, 29); PWC_DEBUG_PROBE("Device serial number is %s\n", serial_number); if (udev->descriptor.bNumConfigurations > 1) PWC_WARNING("Warning: more than 1 configuration available.\n"); /* Allocate structure, initialize pointers, mutexes, etc. and link it to the usb_device */ pdev = kzalloc(sizeof(struct pwc_device), GFP_KERNEL); if (pdev == NULL) { PWC_ERROR("Oops, could not allocate memory for pwc_device.\n"); return -ENOMEM; } pdev->type = type_id; pdev->features = features; pwc_construct(pdev); /* set min/max sizes correct */ mutex_init(&pdev->v4l2_lock); mutex_init(&pdev->vb_queue_lock); spin_lock_init(&pdev->queued_bufs_lock); INIT_LIST_HEAD(&pdev->queued_bufs); pdev->udev = udev; pdev->power_save = my_power_save; /* Init videobuf2 queue structure */ pdev->vb_queue.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; pdev->vb_queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_READ; pdev->vb_queue.drv_priv = pdev; pdev->vb_queue.buf_struct_size = sizeof(struct pwc_frame_buf); pdev->vb_queue.ops = &pwc_vb_queue_ops; pdev->vb_queue.mem_ops = &vb2_vmalloc_memops; pdev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; rc = vb2_queue_init(&pdev->vb_queue); if (rc < 0) { PWC_ERROR("Oops, could not initialize vb2 queue.\n"); goto err_free_mem; } /* Init video_device structure */ pdev->vdev = pwc_template; strscpy(pdev->vdev.name, name, sizeof(pdev->vdev.name)); pdev->vdev.queue = &pdev->vb_queue; pdev->vdev.queue->lock = &pdev->vb_queue_lock; video_set_drvdata(&pdev->vdev, pdev); pdev->release = le16_to_cpu(udev->descriptor.bcdDevice); PWC_DEBUG_PROBE("Release: %04x\n", pdev->release); /* Allocate USB command buffers */ pdev->ctrl_buf = kmalloc(sizeof(pdev->cmd_buf), GFP_KERNEL); if (!pdev->ctrl_buf) { PWC_ERROR("Oops, could not allocate memory for pwc_device.\n"); rc = -ENOMEM; goto err_free_mem; } #ifdef CONFIG_USB_PWC_DEBUG /* Query sensor type */ if (pwc_get_cmos_sensor(pdev, &rc) >= 0) { PWC_DEBUG_OPEN("This %s camera is equipped with a %s (%d).\n", pdev->vdev.name, pwc_sensor_type_to_string(rc), rc); } #endif /* Set the leds off */ pwc_set_leds(pdev, 0, 0); /* Setup initial videomode */ rc = pwc_set_video_mode(pdev, MAX_WIDTH, MAX_HEIGHT, V4L2_PIX_FMT_YUV420, 30, &compression, 1); if (rc) goto err_free_mem; /* Register controls (and read default values from camera */ rc = pwc_init_controls(pdev); if (rc) { PWC_ERROR("Failed to register v4l2 controls (%d).\n", rc); goto err_free_mem; } /* And powerdown the camera until streaming starts */ pwc_camera_power(pdev, 0); /* Register the v4l2_device structure */ pdev->v4l2_dev.release = pwc_video_release; rc = v4l2_device_register(&intf->dev, &pdev->v4l2_dev); if (rc) { PWC_ERROR("Failed to register v4l2-device (%d).\n", rc); goto err_free_controls; } pdev->v4l2_dev.ctrl_handler = &pdev->ctrl_handler; pdev->vdev.v4l2_dev = &pdev->v4l2_dev; pdev->vdev.lock = &pdev->v4l2_lock; pdev->vdev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; rc = video_register_device(&pdev->vdev, VFL_TYPE_VIDEO, -1); if (rc < 0) { PWC_ERROR("Failed to register as video device (%d).\n", rc); goto err_unregister_v4l2_dev; } PWC_INFO("Registered as %s.\n", video_device_node_name(&pdev->vdev)); #ifdef CONFIG_USB_PWC_INPUT_EVDEV /* register webcam snapshot button input device */ pdev->button_dev = input_allocate_device(); if (!pdev->button_dev) { rc = -ENOMEM; goto err_video_unreg; } usb_make_path(udev, pdev->button_phys, sizeof(pdev->button_phys)); strlcat(pdev->button_phys, "/input0", sizeof(pdev->button_phys)); pdev->button_dev->name = "PWC snapshot button"; pdev->button_dev->phys = pdev->button_phys; usb_to_input_id(pdev->udev, &pdev->button_dev->id); pdev->button_dev->dev.parent = &pdev->udev->dev; pdev->button_dev->evbit[0] = BIT_MASK(EV_KEY); pdev->button_dev->keybit[BIT_WORD(KEY_CAMERA)] = BIT_MASK(KEY_CAMERA); rc = input_register_device(pdev->button_dev); if (rc) { input_free_device(pdev->button_dev); pdev->button_dev = NULL; goto err_video_unreg; } #endif return 0; #ifdef CONFIG_USB_PWC_INPUT_EVDEV err_video_unreg: video_unregister_device(&pdev->vdev); #endif err_unregister_v4l2_dev: v4l2_device_unregister(&pdev->v4l2_dev); err_free_controls: v4l2_ctrl_handler_free(&pdev->ctrl_handler); err_free_mem: kfree(pdev->ctrl_buf); kfree(pdev); return rc; } /* The user yanked out the cable... */ static void usb_pwc_disconnect(struct usb_interface *intf) { struct v4l2_device *v = usb_get_intfdata(intf); struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev); mutex_lock(&pdev->vb_queue_lock); mutex_lock(&pdev->v4l2_lock); /* No need to keep the urbs around after disconnection */ if (pdev->vb_queue.streaming) pwc_isoc_cleanup(pdev); pdev->udev = NULL; v4l2_device_disconnect(&pdev->v4l2_dev); video_unregister_device(&pdev->vdev); mutex_unlock(&pdev->v4l2_lock); mutex_unlock(&pdev->vb_queue_lock); #ifdef CONFIG_USB_PWC_INPUT_EVDEV if (pdev->button_dev) input_unregister_device(pdev->button_dev); #endif v4l2_device_put(&pdev->v4l2_dev); } /* * Initialization code & module stuff */ static unsigned int leds_nargs; #ifdef CONFIG_USB_PWC_DEBUG module_param_named(trace, pwc_trace, int, 0644); #endif module_param(power_save, int, 0644); module_param_array(leds, int, &leds_nargs, 0444); #ifdef CONFIG_USB_PWC_DEBUG MODULE_PARM_DESC(trace, "For debugging purposes"); #endif MODULE_PARM_DESC(power_save, "Turn power saving for new cameras on or off"); MODULE_PARM_DESC(leds, "LED on,off time in milliseconds"); MODULE_DESCRIPTION("Philips & OEM USB webcam driver"); MODULE_AUTHOR("Luc Saillard <luc@saillard.org>"); MODULE_LICENSE("GPL"); MODULE_ALIAS("pwcx"); MODULE_VERSION( PWC_VERSION ); module_usb_driver(pwc_driver);
1 1 1 1 5 1 4 4 19 18 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for Linux input subsystem * * Copyright (c) 2006 Anssi Hannula <anssi.hannula@gmail.com> * Copyright (c) 2006 Dmitry Torokhov <dtor@mail.ru> */ /* #define DEBUG */ #include <linux/input.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/sched.h> #include <linux/slab.h> /* * Check that the effect_id is a valid effect and whether the user * is the owner */ static int check_effect_access(struct ff_device *ff, int effect_id, struct file *file) { if (effect_id < 0 || effect_id >= ff->max_effects || !ff->effect_owners[effect_id]) return -EINVAL; if (file && ff->effect_owners[effect_id] != file) return -EACCES; return 0; } /* * Checks whether 2 effects can be combined together */ static inline int check_effects_compatible(struct ff_effect *e1, struct ff_effect *e2) { return e1->type == e2->type && (e1->type != FF_PERIODIC || e1->u.periodic.waveform == e2->u.periodic.waveform); } /* * Convert an effect into compatible one */ static int compat_effect(struct ff_device *ff, struct ff_effect *effect) { int magnitude; switch (effect->type) { case FF_RUMBLE: if (!test_bit(FF_PERIODIC, ff->ffbit)) return -EINVAL; /* * calculate magnitude of sine wave as average of rumble's * 2/3 of strong magnitude and 1/3 of weak magnitude */ magnitude = effect->u.rumble.strong_magnitude / 3 + effect->u.rumble.weak_magnitude / 6; effect->type = FF_PERIODIC; effect->u.periodic.waveform = FF_SINE; effect->u.periodic.period = 50; effect->u.periodic.magnitude = magnitude; effect->u.periodic.offset = 0; effect->u.periodic.phase = 0; effect->u.periodic.envelope.attack_length = 0; effect->u.periodic.envelope.attack_level = 0; effect->u.periodic.envelope.fade_length = 0; effect->u.periodic.envelope.fade_level = 0; return 0; default: /* Let driver handle conversion */ return 0; } } /** * input_ff_upload() - upload effect into force-feedback device * @dev: input device * @effect: effect to be uploaded * @file: owner of the effect */ int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file) { struct ff_device *ff = dev->ff; struct ff_effect *old; int ret = 0; int id; if (!test_bit(EV_FF, dev->evbit)) return -ENOSYS; if (effect->type < FF_EFFECT_MIN || effect->type > FF_EFFECT_MAX || !test_bit(effect->type, dev->ffbit)) { dev_dbg(&dev->dev, "invalid or not supported effect type in upload\n"); return -EINVAL; } if (effect->type == FF_PERIODIC && (effect->u.periodic.waveform < FF_WAVEFORM_MIN || effect->u.periodic.waveform > FF_WAVEFORM_MAX || !test_bit(effect->u.periodic.waveform, dev->ffbit))) { dev_dbg(&dev->dev, "invalid or not supported wave form in upload\n"); return -EINVAL; } if (!test_bit(effect->type, ff->ffbit)) { ret = compat_effect(ff, effect); if (ret) return ret; } mutex_lock(&ff->mutex); if (effect->id == -1) { for (id = 0; id < ff->max_effects; id++) if (!ff->effect_owners[id]) break; if (id >= ff->max_effects) { ret = -ENOSPC; goto out; } effect->id = id; old = NULL; } else { id = effect->id; ret = check_effect_access(ff, id, file); if (ret) goto out; old = &ff->effects[id]; if (!check_effects_compatible(effect, old)) { ret = -EINVAL; goto out; } } ret = ff->upload(dev, effect, old); if (ret) goto out; spin_lock_irq(&dev->event_lock); ff->effects[id] = *effect; ff->effect_owners[id] = file; spin_unlock_irq(&dev->event_lock); out: mutex_unlock(&ff->mutex); return ret; } EXPORT_SYMBOL_GPL(input_ff_upload); /* * Erases the effect if the requester is also the effect owner. The mutex * should already be locked before calling this function. */ static int erase_effect(struct input_dev *dev, int effect_id, struct file *file) { struct ff_device *ff = dev->ff; int error; error = check_effect_access(ff, effect_id, file); if (error) return error; spin_lock_irq(&dev->event_lock); ff->playback(dev, effect_id, 0); ff->effect_owners[effect_id] = NULL; spin_unlock_irq(&dev->event_lock); if (ff->erase) { error = ff->erase(dev, effect_id); if (error) { spin_lock_irq(&dev->event_lock); ff->effect_owners[effect_id] = file; spin_unlock_irq(&dev->event_lock); return error; } } return 0; } /** * input_ff_erase - erase a force-feedback effect from device * @dev: input device to erase effect from * @effect_id: id of the effect to be erased * @file: purported owner of the request * * This function erases a force-feedback effect from specified device. * The effect will only be erased if it was uploaded through the same * file handle that is requesting erase. */ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file) { struct ff_device *ff = dev->ff; int ret; if (!test_bit(EV_FF, dev->evbit)) return -ENOSYS; mutex_lock(&ff->mutex); ret = erase_effect(dev, effect_id, file); mutex_unlock(&ff->mutex); return ret; } EXPORT_SYMBOL_GPL(input_ff_erase); /* * input_ff_flush - erase all effects owned by a file handle * @dev: input device to erase effect from * @file: purported owner of the effects * * This function erases all force-feedback effects associated with * the given owner from specified device. Note that @file may be %NULL, * in which case all effects will be erased. */ int input_ff_flush(struct input_dev *dev, struct file *file) { struct ff_device *ff = dev->ff; int i; dev_dbg(&dev->dev, "flushing now\n"); mutex_lock(&ff->mutex); for (i = 0; i < ff->max_effects; i++) erase_effect(dev, i, file); mutex_unlock(&ff->mutex); return 0; } EXPORT_SYMBOL_GPL(input_ff_flush); /** * input_ff_event() - generic handler for force-feedback events * @dev: input device to send the effect to * @type: event type (anything but EV_FF is ignored) * @code: event code * @value: event value */ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct ff_device *ff = dev->ff; if (type != EV_FF) return 0; switch (code) { case FF_GAIN: if (!test_bit(FF_GAIN, dev->ffbit) || value > 0xffffU) break; ff->set_gain(dev, value); break; case FF_AUTOCENTER: if (!test_bit(FF_AUTOCENTER, dev->ffbit) || value > 0xffffU) break; ff->set_autocenter(dev, value); break; default: if (check_effect_access(ff, code, NULL) == 0) ff->playback(dev, code, value); break; } return 0; } EXPORT_SYMBOL_GPL(input_ff_event); /** * input_ff_create() - create force-feedback device * @dev: input device supporting force-feedback * @max_effects: maximum number of effects supported by the device * * This function allocates all necessary memory for a force feedback * portion of an input device and installs all default handlers. * @dev->ffbit should be already set up before calling this function. * Once ff device is created you need to setup its upload, erase, * playback and other handlers before registering input device */ int input_ff_create(struct input_dev *dev, unsigned int max_effects) { struct ff_device *ff; size_t ff_dev_size; int i; if (!max_effects) { dev_err(&dev->dev, "cannot allocate device without any effects\n"); return -EINVAL; } if (max_effects > FF_MAX_EFFECTS) { dev_err(&dev->dev, "cannot allocate more than FF_MAX_EFFECTS effects\n"); return -EINVAL; } ff_dev_size = sizeof(struct ff_device) + max_effects * sizeof(struct file *); if (ff_dev_size < max_effects) /* overflow */ return -EINVAL; ff = kzalloc(ff_dev_size, GFP_KERNEL); if (!ff) return -ENOMEM; ff->effects = kcalloc(max_effects, sizeof(struct ff_effect), GFP_KERNEL); if (!ff->effects) { kfree(ff); return -ENOMEM; } ff->max_effects = max_effects; mutex_init(&ff->mutex); dev->ff = ff; dev->flush = input_ff_flush; dev->event = input_ff_event; __set_bit(EV_FF, dev->evbit); /* Copy "true" bits into ff device bitmap */ for_each_set_bit(i, dev->ffbit, FF_CNT) __set_bit(i, ff->ffbit); /* we can emulate RUMBLE with periodic effects */ if (test_bit(FF_PERIODIC, ff->ffbit)) __set_bit(FF_RUMBLE, dev->ffbit); return 0; } EXPORT_SYMBOL_GPL(input_ff_create); /** * input_ff_destroy() - frees force feedback portion of input device * @dev: input device supporting force feedback * * This function is only needed in error path as input core will * automatically free force feedback structures when device is * destroyed. */ void input_ff_destroy(struct input_dev *dev) { struct ff_device *ff = dev->ff; __clear_bit(EV_FF, dev->evbit); if (ff) { if (ff->destroy) ff->destroy(ff); kfree(ff->private); kfree(ff->effects); kfree(ff); dev->ff = NULL; } } EXPORT_SYMBOL_GPL(input_ff_destroy);
13 269 269 5 5 10 7 3 10 10 10 1 9 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 // SPDX-License-Identifier: GPL-2.0 /* * usb port device code * * Copyright (C) 2012 Intel Corp * * Author: Lan Tianyu <tianyu.lan@intel.com> */ #include <linux/kstrtox.h> #include <linux/slab.h> #include <linux/pm_qos.h> #include <linux/component.h> #include "hub.h" static int usb_port_block_power_off; static const struct attribute_group *port_dev_group[]; static ssize_t early_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "%s\n", port_dev->early_stop ? "yes" : "no"); } static ssize_t early_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); bool value; if (kstrtobool(buf, &value)) return -EINVAL; if (value) port_dev->early_stop = 1; else port_dev->early_stop = 0; return count; } static DEVICE_ATTR_RW(early_stop); static ssize_t disable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_interface *intf = to_usb_interface(hub->intfdev); int port1 = port_dev->portnum; u16 portstatus, unused; bool disabled; int rc; rc = usb_autopm_get_interface(intf); if (rc < 0) return rc; usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; goto out_hdev_lock; } usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); out_hdev_lock: usb_unlock_device(hdev); usb_autopm_put_interface(intf); if (rc) return rc; return sysfs_emit(buf, "%s\n", disabled ? "1" : "0"); } static ssize_t disable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_interface *intf = to_usb_interface(hub->intfdev); int port1 = port_dev->portnum; bool disabled; int rc; rc = kstrtobool(buf, &disabled); if (rc) return rc; rc = usb_autopm_get_interface(intf); if (rc < 0) return rc; usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; goto out_hdev_lock; } if (disabled && port_dev->child) usb_disconnect(&port_dev->child); rc = usb_hub_set_port_power(hdev, hub, port1, !disabled); if (disabled) { usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); } if (!rc) rc = count; out_hdev_lock: usb_unlock_device(hdev); usb_autopm_put_interface(intf); return rc; } static DEVICE_ATTR_RW(disable); static ssize_t location_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sprintf(buf, "0x%08x\n", port_dev->location); } static DEVICE_ATTR_RO(location); static ssize_t connect_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); char *result; switch (port_dev->connect_type) { case USB_PORT_CONNECT_TYPE_HOT_PLUG: result = "hotplug"; break; case USB_PORT_CONNECT_TYPE_HARD_WIRED: result = "hardwired"; break; case USB_PORT_NOT_USED: result = "not used"; break; default: result = "unknown"; break; } return sprintf(buf, "%s\n", result); } static DEVICE_ATTR_RO(connect_type); static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); enum usb_device_state state = READ_ONCE(port_dev->state); return sysfs_emit(buf, "%s\n", usb_state_string(state)); } static DEVICE_ATTR_RO(state); static ssize_t over_current_count_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sprintf(buf, "%u\n", port_dev->over_current_count); } static DEVICE_ATTR_RO(over_current_count); static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sprintf(buf, "%08x\n", port_dev->quirks); } static ssize_t quirks_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); u32 value; if (kstrtou32(buf, 16, &value)) return -EINVAL; port_dev->quirks = value; return count; } static DEVICE_ATTR_RW(quirks); static ssize_t usb3_lpm_permit_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); const char *p; if (port_dev->usb3_lpm_u1_permit) { if (port_dev->usb3_lpm_u2_permit) p = "u1_u2"; else p = "u1"; } else { if (port_dev->usb3_lpm_u2_permit) p = "u2"; else p = "0"; } return sprintf(buf, "%s\n", p); } static ssize_t usb3_lpm_permit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *udev = port_dev->child; struct usb_hcd *hcd; if (!strncmp(buf, "u1_u2", 5)) { port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 1; } else if (!strncmp(buf, "u1", 2)) { port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 0; } else if (!strncmp(buf, "u2", 2)) { port_dev->usb3_lpm_u1_permit = 0; port_dev->usb3_lpm_u2_permit = 1; } else if (!strncmp(buf, "0", 1)) { port_dev->usb3_lpm_u1_permit = 0; port_dev->usb3_lpm_u2_permit = 0; } else return -EINVAL; /* If device is connected to the port, disable or enable lpm * to make new u1 u2 setting take effect immediately. */ if (udev) { hcd = bus_to_hcd(udev->bus); if (!hcd) return -EINVAL; usb_lock_device(udev); mutex_lock(hcd->bandwidth_mutex); if (!usb_disable_lpm(udev)) usb_enable_lpm(udev); mutex_unlock(hcd->bandwidth_mutex); usb_unlock_device(udev); } return count; } static DEVICE_ATTR_RW(usb3_lpm_permit); static struct attribute *port_dev_attrs[] = { &dev_attr_connect_type.attr, &dev_attr_state.attr, &dev_attr_location.attr, &dev_attr_quirks.attr, &dev_attr_over_current_count.attr, &dev_attr_disable.attr, &dev_attr_early_stop.attr, NULL, }; static const struct attribute_group port_dev_attr_grp = { .attrs = port_dev_attrs, }; static const struct attribute_group *port_dev_group[] = { &port_dev_attr_grp, NULL, }; static struct attribute *port_dev_usb3_attrs[] = { &dev_attr_usb3_lpm_permit.attr, NULL, }; static const struct attribute_group port_dev_usb3_attr_grp = { .attrs = port_dev_usb3_attrs, }; static const struct attribute_group *port_dev_usb3_group[] = { &port_dev_attr_grp, &port_dev_usb3_attr_grp, NULL, }; static void usb_port_device_release(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); kfree(port_dev->req); kfree(port_dev); } #ifdef CONFIG_PM static int usb_port_runtime_resume(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_interface *intf = to_usb_interface(dev->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_device *udev = port_dev->child; struct usb_port *peer = port_dev->peer; int port1 = port_dev->portnum; int retval; if (!hub) return -EINVAL; if (hub->in_reset) { set_bit(port1, hub->power_bits); return 0; } /* * Power on our usb3 peer before this usb2 port to prevent a usb3 * device from degrading to its usb2 connection */ if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); retval = usb_autopm_get_interface(intf); if (retval < 0) return retval; retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { /* * Our preference is to simply wait for the port to reconnect, * as that is the lowest latency method to restart the port. * However, there are cases where toggling port power results in * the host port and the device port getting out of sync causing * a link training live lock. Upon timeout, flag the port as * needing warm reset recovery (to be performed later by * usb_port_resume() as requested via usb_wakeup_notification()) */ if (hub_port_debounce_be_connected(hub, port1) < 0) { dev_dbg(&port_dev->dev, "reconnect timeout\n"); if (hub_is_superspeed(hdev)) set_bit(port1, hub->warm_reset_bits); } /* Force the child awake to revalidate after the power loss. */ if (!test_and_set_bit(port1, hub->child_usage_bits)) { pm_runtime_get_noresume(&port_dev->dev); pm_request_resume(&udev->dev); } } usb_autopm_put_interface(intf); return retval; } static int usb_port_runtime_suspend(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_interface *intf = to_usb_interface(dev->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_port *peer = port_dev->peer; int port1 = port_dev->portnum; int retval; if (!hub) return -EINVAL; if (hub->in_reset) return -EBUSY; if (dev_pm_qos_flags(&port_dev->dev, PM_QOS_FLAG_NO_POWER_OFF) == PM_QOS_FLAGS_ALL) return -EAGAIN; if (usb_port_block_power_off) return -EBUSY; retval = usb_autopm_get_interface(intf); if (retval < 0) return retval; retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); usb_autopm_put_interface(intf); /* * Our peer usb3 port may now be able to suspend, so * asynchronously queue a suspend request to observe that this * usb2 port is now off. */ if (!port_dev->is_superspeed && peer) pm_runtime_put(&peer->dev); return retval; } #endif static void usb_port_shutdown(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); if (port_dev->child) usb_disable_usb2_hardware_lpm(port_dev->child); } static const struct dev_pm_ops usb_port_pm_ops = { #ifdef CONFIG_PM .runtime_suspend = usb_port_runtime_suspend, .runtime_resume = usb_port_runtime_resume, #endif }; struct device_type usb_port_device_type = { .name = "usb_port", .release = usb_port_device_release, .pm = &usb_port_pm_ops, }; static struct device_driver usb_port_driver = { .name = "usb", .owner = THIS_MODULE, .shutdown = usb_port_shutdown, }; static int link_peers(struct usb_port *left, struct usb_port *right) { struct usb_port *ss_port, *hs_port; int rc; if (left->peer == right && right->peer == left) return 0; if (left->peer || right->peer) { struct usb_port *lpeer = left->peer; struct usb_port *rpeer = right->peer; char *method; if (left->location && left->location == right->location) method = "location"; else method = "default"; pr_debug("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n", dev_name(&left->dev), dev_name(&right->dev), method, dev_name(&left->dev), lpeer ? dev_name(&lpeer->dev) : "none", dev_name(&right->dev), rpeer ? dev_name(&rpeer->dev) : "none"); return -EBUSY; } rc = sysfs_create_link(&left->dev.kobj, &right->dev.kobj, "peer"); if (rc) return rc; rc = sysfs_create_link(&right->dev.kobj, &left->dev.kobj, "peer"); if (rc) { sysfs_remove_link(&left->dev.kobj, "peer"); return rc; } /* * We need to wake the HiSpeed port to make sure we don't race * setting ->peer with usb_port_runtime_suspend(). Otherwise we * may miss a suspend event for the SuperSpeed port. */ if (left->is_superspeed) { ss_port = left; WARN_ON(right->is_superspeed); hs_port = right; } else { ss_port = right; WARN_ON(!right->is_superspeed); hs_port = left; } pm_runtime_get_sync(&hs_port->dev); left->peer = right; right->peer = left; /* * The SuperSpeed reference is dropped when the HiSpeed port in * this relationship suspends, i.e. when it is safe to allow a * SuperSpeed connection to drop since there is no risk of a * device degrading to its powered-off HiSpeed connection. * * Also, drop the HiSpeed ref taken above. */ pm_runtime_get_sync(&ss_port->dev); pm_runtime_put(&hs_port->dev); return 0; } static void link_peers_report(struct usb_port *left, struct usb_port *right) { int rc; rc = link_peers(left, right); if (rc == 0) { dev_dbg(&left->dev, "peered to %s\n", dev_name(&right->dev)); } else { dev_dbg(&left->dev, "failed to peer to %s (%d)\n", dev_name(&right->dev), rc); pr_warn_once("usb: port power management may be unreliable\n"); usb_port_block_power_off = 1; } } static void unlink_peers(struct usb_port *left, struct usb_port *right) { struct usb_port *ss_port, *hs_port; WARN(right->peer != left || left->peer != right, "%s and %s are not peers?\n", dev_name(&left->dev), dev_name(&right->dev)); /* * We wake the HiSpeed port to make sure we don't race its * usb_port_runtime_resume() event which takes a SuperSpeed ref * when ->peer is !NULL. */ if (left->is_superspeed) { ss_port = left; hs_port = right; } else { ss_port = right; hs_port = left; } pm_runtime_get_sync(&hs_port->dev); sysfs_remove_link(&left->dev.kobj, "peer"); right->peer = NULL; sysfs_remove_link(&right->dev.kobj, "peer"); left->peer = NULL; /* Drop the SuperSpeed ref held on behalf of the active HiSpeed port */ pm_runtime_put(&ss_port->dev); /* Drop the ref taken above */ pm_runtime_put(&hs_port->dev); } /* * For each usb hub device in the system check to see if it is in the * peer domain of the given port_dev, and if it is check to see if it * has a port that matches the given port by location */ static int match_location(struct usb_device *peer_hdev, void *p) { int port1; struct usb_hcd *hcd, *peer_hcd; struct usb_port *port_dev = p, *peer; struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); if (!peer_hub) return 0; hcd = bus_to_hcd(hdev->bus); peer_hcd = bus_to_hcd(peer_hdev->bus); /* peer_hcd is provisional until we verify it against the known peer */ if (peer_hcd != hcd->shared_hcd) return 0; for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; if (peer && peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } } return 0; } /* * Find the peer port either via explicit platform firmware "location" * data, the peer hcd for root hubs, or the upstream peer relationship * for all other hubs. */ static void find_and_link_peer(struct usb_hub *hub, int port1) { struct usb_port *port_dev = hub->ports[port1 - 1], *peer; struct usb_device *hdev = hub->hdev; struct usb_device *peer_hdev; struct usb_hub *peer_hub; /* * If location data is available then we can only peer this port * by a location match, not the default peer (lest we create a * situation where we need to go back and undo a default peering * when the port is later peered by location data) */ if (port_dev->location) { /* we link the peer in match_location() if found */ usb_for_each_dev(port_dev, match_location); return; } else if (!hdev->parent) { struct usb_hcd *hcd = bus_to_hcd(hdev->bus); struct usb_hcd *peer_hcd = hcd->shared_hcd; if (!peer_hcd) return; peer_hdev = peer_hcd->self.root_hub; } else { struct usb_port *upstream; struct usb_device *parent = hdev->parent; struct usb_hub *parent_hub = usb_hub_to_struct_hub(parent); if (!parent_hub) return; upstream = parent_hub->ports[hdev->portnum - 1]; if (!upstream || !upstream->peer) return; peer_hdev = upstream->peer->child; } peer_hub = usb_hub_to_struct_hub(peer_hdev); if (!peer_hub || port1 > peer_hdev->maxchild) return; /* * we found a valid default peer, last check is to make sure it * does not have location data */ peer = peer_hub->ports[port1 - 1]; if (peer && peer->location == 0) link_peers_report(port_dev, peer); } static int connector_bind(struct device *dev, struct device *connector, void *data) { struct usb_port *port_dev = to_usb_port(dev); int ret; ret = sysfs_create_link(&dev->kobj, &connector->kobj, "connector"); if (ret) return ret; ret = sysfs_create_link(&connector->kobj, &dev->kobj, dev_name(dev)); if (ret) { sysfs_remove_link(&dev->kobj, "connector"); return ret; } port_dev->connector = data; /* * If there is already USB device connected to the port, letting the * Type-C connector know about it immediately. */ if (port_dev->child) typec_attach(port_dev->connector, &port_dev->child->dev); return 0; } static void connector_unbind(struct device *dev, struct device *connector, void *data) { struct usb_port *port_dev = to_usb_port(dev); sysfs_remove_link(&connector->kobj, dev_name(dev)); sysfs_remove_link(&dev->kobj, "connector"); port_dev->connector = NULL; } static const struct component_ops connector_ops = { .bind = connector_bind, .unbind = connector_unbind, }; int usb_hub_create_port_device(struct usb_hub *hub, int port1) { struct usb_port *port_dev; struct usb_device *hdev = hub->hdev; int retval; port_dev = kzalloc(sizeof(*port_dev), GFP_KERNEL); if (!port_dev) return -ENOMEM; port_dev->req = kzalloc(sizeof(*(port_dev->req)), GFP_KERNEL); if (!port_dev->req) { kfree(port_dev); return -ENOMEM; } hub->ports[port1 - 1] = port_dev; port_dev->portnum = port1; set_bit(port1, hub->power_bits); port_dev->dev.parent = hub->intfdev; if (hub_is_superspeed(hdev)) { port_dev->is_superspeed = 1; port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 1; port_dev->dev.groups = port_dev_usb3_group; } else port_dev->dev.groups = port_dev_group; port_dev->dev.type = &usb_port_device_type; port_dev->dev.driver = &usb_port_driver; dev_set_name(&port_dev->dev, "%s-port%d", dev_name(&hub->hdev->dev), port1); mutex_init(&port_dev->status_lock); retval = device_register(&port_dev->dev); if (retval) { put_device(&port_dev->dev); return retval; } port_dev->state_kn = sysfs_get_dirent(port_dev->dev.kobj.sd, "state"); if (!port_dev->state_kn) { dev_err(&port_dev->dev, "failed to sysfs_get_dirent 'state'\n"); retval = -ENODEV; goto err_unregister; } /* Set default policy of port-poweroff disabled. */ retval = dev_pm_qos_add_request(&port_dev->dev, port_dev->req, DEV_PM_QOS_FLAGS, PM_QOS_FLAG_NO_POWER_OFF); if (retval < 0) { goto err_put_kn; } retval = component_add(&port_dev->dev, &connector_ops); if (retval) { dev_warn(&port_dev->dev, "failed to add component\n"); goto err_put_kn; } find_and_link_peer(hub, port1); /* * Enable runtime pm and hold a refernce that hub_configure() * will drop once the PM_QOS_NO_POWER_OFF flag state has been set * and the hub has been fully registered (hdev->maxchild set). */ pm_runtime_set_active(&port_dev->dev); pm_runtime_get_noresume(&port_dev->dev); pm_runtime_enable(&port_dev->dev); device_enable_async_suspend(&port_dev->dev); /* * Keep hidden the ability to enable port-poweroff if the hub * does not support power switching. */ if (!hub_is_port_power_switchable(hub)) return 0; /* Attempt to let userspace take over the policy. */ retval = dev_pm_qos_expose_flags(&port_dev->dev, PM_QOS_FLAG_NO_POWER_OFF); if (retval < 0) { dev_warn(&port_dev->dev, "failed to expose pm_qos_no_poweroff\n"); return 0; } /* Userspace owns the policy, drop the kernel 'no_poweroff' request. */ retval = dev_pm_qos_remove_request(port_dev->req); if (retval >= 0) { kfree(port_dev->req); port_dev->req = NULL; } return 0; err_put_kn: sysfs_put(port_dev->state_kn); err_unregister: device_unregister(&port_dev->dev); return retval; } void usb_hub_remove_port_device(struct usb_hub *hub, int port1) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_port *peer; peer = port_dev->peer; if (peer) unlink_peers(port_dev, peer); component_del(&port_dev->dev, &connector_ops); sysfs_put(port_dev->state_kn); device_unregister(&port_dev->dev); }
4 160 41 57 57 153 37 86 86 88 87 1450 38 1396 1394 195 194 144 50 202 202 104 98 4 4 169 72 99 78 74 74 4 4 92 830 11 9 94 14 46 690 764 73 2 2 2 2 506 3 2 6 4 5 7 3 72 10 8 16 10 6 2 12 8 43 28 17 3 4 14 4 5 4 7 6 10 29 3 2 26 10 3 3 6 35 12 13 7 35 4 1 3 2 3 3 2 2 3 15 7 3 1 3 54 46 1 1 3 2 2 37 15 1 1 13 26 1 1 2 2 20 7 1 6 61 1 1 2 2 2 2 51 327 1 4 4 322 112 1 2 2 1 1 2 1 1 2 1 1 2 1 1 1 1 2 2 1 1 1 1 4 78 1 28 44 17 3 1 13 16 22 1 1 18 9 9 2 19 230 229 227 4 29 29 2 5347 347 1940 756 2488 88 5 7 3 8 1 1 224 226 226 23 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com * Copyright (c) 2016 Facebook */ #include <linux/kernel.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/bpf_perf_event.h> #include <linux/btf.h> #include <linux/filter.h> #include <linux/uaccess.h> #include <linux/ctype.h> #include <linux/kprobes.h> #include <linux/spinlock.h> #include <linux/syscalls.h> #include <linux/error-injection.h> #include <linux/btf_ids.h> #include <linux/bpf_lsm.h> #include <linux/fprobe.h> #include <linux/bsearch.h> #include <linux/sort.h> #include <linux/key.h> #include <linux/verification.h> #include <linux/namei.h> #include <linux/fileattr.h> #include <net/bpf_sk_storage.h> #include <uapi/linux/bpf.h> #include <uapi/linux/btf.h> #include <asm/tlb.h> #include "trace_probe.h" #include "trace.h" #define CREATE_TRACE_POINTS #include "bpf_trace.h" #define bpf_event_rcu_dereference(p) \ rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) #define MAX_UPROBE_MULTI_CNT (1U << 20) #define MAX_KPROBE_MULTI_CNT (1U << 20) #ifdef CONFIG_MODULES struct bpf_trace_module { struct module *module; struct list_head list; }; static LIST_HEAD(bpf_trace_modules); static DEFINE_MUTEX(bpf_module_mutex); static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) { struct bpf_raw_event_map *btp, *ret = NULL; struct bpf_trace_module *btm; unsigned int i; mutex_lock(&bpf_module_mutex); list_for_each_entry(btm, &bpf_trace_modules, list) { for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { btp = &btm->module->bpf_raw_events[i]; if (!strcmp(btp->tp->name, name)) { if (try_module_get(btm->module)) ret = btp; goto out; } } } out: mutex_unlock(&bpf_module_mutex); return ret; } #else static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) { return NULL; } #endif /* CONFIG_MODULES */ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags, const struct btf **btf, s32 *btf_id); static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx); static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx); static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx); static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx); /** * trace_call_bpf - invoke BPF program * @call: tracepoint event * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. * Can be used from static tracepoints in the future. * * Return: BPF programs always return an integer which is interpreted by * kprobe handler as: * 0 - return from kprobe (event is filtered out) * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; cant_sleep(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { /* * since some bpf program is already running on this cpu, * don't call into another bpf program (same or different) * and don't send kprobe event into ring-buffer, * so return zero here */ rcu_read_lock(); bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); rcu_read_unlock(); ret = 0; goto out; } /* * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock * to all call sites, we did a bpf_prog_array_valid() there to check * whether call->prog_array is empty or not, which is * a heuristic to speed up execution. * * If bpf_prog_array_valid() fetched prog_array was * non-NULL, we go into trace_call_bpf() and do the actual * proper rcu_dereference() under RCU lock. * If it turns out that prog_array is NULL then, we bail out. * For the opposite, if the bpf_prog_array_valid() fetched pointer * was NULL, you'll skip the prog_array with the risk of missing * out of events when it was updated in between this and the * rcu_dereference() which is accepted risk. */ rcu_read_lock(); ret = bpf_prog_run_array(rcu_dereference(call->prog_array), ctx, bpf_prog_run); rcu_read_unlock(); out: __this_cpu_dec(bpf_prog_active); return ret; } #ifdef CONFIG_BPF_KPROBE_OVERRIDE BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { regs_set_return_value(regs, rc); override_function_with_return(regs); return 0; } static const struct bpf_func_proto bpf_override_return_proto = { .func = bpf_override_return, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, }; #endif static __always_inline int bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) { int ret; ret = copy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; } BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, const void __user *, unsafe_ptr) { return bpf_probe_read_user_common(dst, size, unsafe_ptr); } const struct bpf_func_proto bpf_probe_read_user_proto = { .func = bpf_probe_read_user, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; static __always_inline int bpf_probe_read_user_str_common(void *dst, u32 size, const void __user *unsafe_ptr) { int ret; /* * NB: We rely on strncpy_from_user() not copying junk past the NUL * terminator into `dst`. * * strncpy_from_user() does long-sized strides in the fast path. If the * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, * then there could be junk after the NUL in `dst`. If user takes `dst` * and keys a hash map with it, then semantically identical strings can * occupy multiple entries in the map. */ ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; } BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, const void __user *, unsafe_ptr) { return bpf_probe_read_user_str_common(dst, size, unsafe_ptr); } const struct bpf_func_proto bpf_probe_read_user_str_proto = { .func = bpf_probe_read_user_str, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, const void *, unsafe_ptr) { return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); } const struct bpf_func_proto bpf_probe_read_kernel_proto = { .func = bpf_probe_read_kernel, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; static __always_inline int bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) { int ret; /* * The strncpy_from_kernel_nofault() call will likely not fill the * entire buffer, but that's okay in this circumstance as we're probing * arbitrary memory anyway similar to bpf_probe_read_*() and might * as well probe the stack. Thus, memory is explicitly cleared * only in error case, so that improper users ignoring return * code altogether don't copy garbage; otherwise length of string * is returned that can be used for bpf_perf_event_output() et al. */ ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; } BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size, const void *, unsafe_ptr) { return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); } const struct bpf_func_proto bpf_probe_read_kernel_str_proto = { .func = bpf_probe_read_kernel_str, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, const void *, unsafe_ptr) { if ((unsigned long)unsafe_ptr < TASK_SIZE) { return bpf_probe_read_user_common(dst, size, (__force void __user *)unsafe_ptr); } return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); } static const struct bpf_func_proto bpf_probe_read_compat_proto = { .func = bpf_probe_read_compat, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size, const void *, unsafe_ptr) { if ((unsigned long)unsafe_ptr < TASK_SIZE) { return bpf_probe_read_user_str_common(dst, size, (__force void __user *)unsafe_ptr); } return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); } static const struct bpf_func_proto bpf_probe_read_compat_str_proto = { .func = bpf_probe_read_compat_str, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, u32, size) { /* * Ensure we're in user context which is safe for the helper to * run. This helper has no business in a kthread. * * access_ok() should prevent writing to non-user memory, but in * some situations (nommu, temporary switch, etc) access_ok() does * not provide enough validation, hence the check on KERNEL_DS. * * nmi_uaccess_okay() ensures the probe is not run in an interim * state, when the task or mm are switched. This is specifically * required to prevent the use of temporary mm. */ if (unlikely(in_interrupt() || current->flags & (PF_KTHREAD | PF_EXITING))) return -EPERM; if (unlikely(!nmi_uaccess_okay())) return -EPERM; return copy_to_user_nofault(unsafe_ptr, src, size); } static const struct bpf_func_proto bpf_probe_write_user_proto = { .func = bpf_probe_write_user, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg3_type = ARG_CONST_SIZE, }; static const struct bpf_func_proto *bpf_get_probe_write_proto(void) { if (!capable(CAP_SYS_ADMIN)) return NULL; pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", current->comm, task_pid_nr(current)); return &bpf_probe_write_user_proto; } #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; struct bpf_bprintf_data data = { .get_bin_args = true, .get_buf = true, }; int ret; ret = bpf_bprintf_prepare(fmt, fmt_size, args, MAX_TRACE_PRINTK_VARARGS, &data); if (ret < 0) return ret; ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); trace_bpf_trace_printk(data.buf); bpf_bprintf_cleanup(&data); return ret; } static const struct bpf_func_proto bpf_trace_printk_proto = { .func = bpf_trace_printk, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, }; static void __set_printk_clr_event(void) { /* * This program might be calling bpf_trace_printk, * so enable the associated bpf_trace/bpf_trace_printk event. * Repeat this each time as it is possible a user has * disabled bpf_trace_printk events. By loading a program * calling bpf_trace_printk() however the user has expressed * the intent to see such events. */ if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) pr_warn_ratelimited("could not enable bpf_trace_printk events"); } const struct bpf_func_proto *bpf_get_trace_printk_proto(void) { __set_printk_clr_event(); return &bpf_trace_printk_proto; } BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { struct bpf_bprintf_data data = { .get_bin_args = true, .get_buf = true, }; int ret, num_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || (data_len && !args)) return -EINVAL; num_args = data_len / 8; ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (ret < 0) return ret; ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); trace_bpf_trace_printk(data.buf); bpf_bprintf_cleanup(&data); return ret; } static const struct bpf_func_proto bpf_trace_vprintk_proto = { .func = bpf_trace_vprintk, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE_OR_ZERO, }; const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) { __set_printk_clr_event(); return &bpf_trace_vprintk_proto; } BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { struct bpf_bprintf_data data = { .get_bin_args = true, }; int err, num_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || (data_len && !args)) return -EINVAL; num_args = data_len / 8; err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (err < 0) return err; seq_bprintf(m, fmt, data.bin_args); bpf_bprintf_cleanup(&data); return seq_has_overflowed(m) ? -EOVERFLOW : 0; } BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file) static const struct bpf_func_proto bpf_seq_printf_proto = { .func = bpf_seq_printf, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_seq_file_ids[0], .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) { return seq_write(m, data, len) ? -EOVERFLOW : 0; } static const struct bpf_func_proto bpf_seq_write_proto = { .func = bpf_seq_write, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_seq_file_ids[0], .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg3_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr, u32, btf_ptr_size, u64, flags) { const struct btf *btf; s32 btf_id; int ret; ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); if (ret) return ret; return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags); } static const struct bpf_func_proto bpf_seq_printf_btf_proto = { .func = bpf_seq_printf_btf, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_seq_file_ids[0], .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; static __always_inline int get_map_perf_counter(struct bpf_map *map, u64 flags, u64 *value, u64 *enabled, u64 *running) { struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; if (index == BPF_F_CURRENT_CPU) index = cpu; if (unlikely(index >= array->map.max_entries)) return -E2BIG; ee = READ_ONCE(array->ptrs[index]); if (!ee) return -ENOENT; return perf_event_read_local(ee->event, value, enabled, running); } BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) { u64 value = 0; int err; err = get_map_perf_counter(map, flags, &value, NULL, NULL); /* * this api is ugly since we miss [-22..-2] range of valid * counter values, but that's uapi */ if (err) return err; return value; } static const struct bpf_func_proto bpf_perf_event_read_proto = { .func = bpf_perf_event_read, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, struct bpf_perf_event_value *, buf, u32, size) { int err = -EINVAL; if (unlikely(size != sizeof(struct bpf_perf_event_value))) goto clear; err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, &buf->running); if (unlikely(err)) goto clear; return 0; clear: memset(buf, 0, size); return err; } static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .func = bpf_perf_event_read_value, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_UNINIT_MEM, .arg4_type = ARG_CONST_SIZE, }; static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_sample_data *sd) { struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; struct perf_event *event; if (index == BPF_F_CURRENT_CPU) index = cpu; if (unlikely(index >= array->map.max_entries)) return -E2BIG; ee = READ_ONCE(array->ptrs[index]); if (!ee) return -ENOENT; event = ee->event; if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) return -EINVAL; if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; return perf_event_output(event, sd, regs); } /* * Support executing tracepoints in normal, irq, and nmi context that each call * bpf_perf_event_output */ struct bpf_trace_sample_data { struct perf_sample_data sds[3]; }; static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); static DEFINE_PER_CPU(int, bpf_trace_nest_level); BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { struct bpf_trace_sample_data *sds; struct perf_raw_record raw = { .frag = { .size = size, .data = data, }, }; struct perf_sample_data *sd; int nest_level, err; preempt_disable(); sds = this_cpu_ptr(&bpf_trace_sds); nest_level = this_cpu_inc_return(bpf_trace_nest_level); if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { err = -EBUSY; goto out; } sd = &sds->sds[nest_level - 1]; if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { err = -EINVAL; goto out; } perf_sample_data_init(sd, 0, 0); perf_sample_save_raw_data(sd, &raw); err = __bpf_perf_event_output(regs, map, flags, sd); out: this_cpu_dec(bpf_trace_nest_level); preempt_enable(); return err; } static const struct bpf_func_proto bpf_perf_event_output_proto = { .func = bpf_perf_event_output, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; static DEFINE_PER_CPU(int, bpf_event_output_nest_level); struct bpf_nested_pt_regs { struct pt_regs regs[3]; }; static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { struct perf_raw_frag frag = { .copy = ctx_copy, .size = ctx_size, .data = ctx, }; struct perf_raw_record raw = { .frag = { { .next = ctx_size ? &frag : NULL, }, .size = meta_size, .data = meta, }, }; struct perf_sample_data *sd; struct pt_regs *regs; int nest_level; u64 ret; preempt_disable(); nest_level = this_cpu_inc_return(bpf_event_output_nest_level); if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { ret = -EBUSY; goto out; } sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); perf_fetch_caller_regs(regs); perf_sample_data_init(sd, 0, 0); perf_sample_save_raw_data(sd, &raw); ret = __bpf_perf_event_output(regs, map, flags, sd); out: this_cpu_dec(bpf_event_output_nest_level); preempt_enable(); return ret; } BPF_CALL_0(bpf_get_current_task) { return (long) current; } const struct bpf_func_proto bpf_get_current_task_proto = { .func = bpf_get_current_task, .gpl_only = true, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_current_task_btf) { return (unsigned long) current; } const struct bpf_func_proto bpf_get_current_task_btf_proto = { .func = bpf_get_current_task_btf, .gpl_only = true, .ret_type = RET_PTR_TO_BTF_ID_TRUSTED, .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) { return (unsigned long) task_pt_regs(task); } BTF_ID_LIST(bpf_task_pt_regs_ids) BTF_ID(struct, pt_regs) const struct bpf_func_proto bpf_task_pt_regs_proto = { .func = bpf_task_pt_regs, .gpl_only = true, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .ret_type = RET_PTR_TO_BTF_ID, .ret_btf_id = &bpf_task_pt_regs_ids[0], }; BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; if (unlikely(idx >= array->map.max_entries)) return -E2BIG; cgrp = READ_ONCE(array->ptrs[idx]); if (unlikely(!cgrp)) return -EAGAIN; return task_under_cgroup_hierarchy(current, cgrp); } static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { .func = bpf_current_task_under_cgroup, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; struct send_signal_irq_work { struct irq_work irq_work; struct task_struct *task; u32 sig; enum pid_type type; }; static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); static void do_bpf_send_signal(struct irq_work *entry) { struct send_signal_irq_work *work; work = container_of(entry, struct send_signal_irq_work, irq_work); group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); put_task_struct(work->task); } static int bpf_send_signal_common(u32 sig, enum pid_type type) { struct send_signal_irq_work *work = NULL; /* Similar to bpf_probe_write_user, task needs to be * in a sound condition and kernel memory access be * permitted in order to send signal to the current * task. */ if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) return -EPERM; if (unlikely(!nmi_uaccess_okay())) return -EPERM; /* Task should not be pid=1 to avoid kernel panic. */ if (unlikely(is_global_init(current))) return -EPERM; if (irqs_disabled()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ if (unlikely(!valid_signal(sig))) return -EINVAL; work = this_cpu_ptr(&send_signal_work); if (irq_work_is_busy(&work->irq_work)) return -EBUSY; /* Add the current task, which is the target of sending signal, * to the irq_work. The current task may change when queued * irq works get executed. */ work->task = get_task_struct(current); work->sig = sig; work->type = type; irq_work_queue(&work->irq_work); return 0; } return group_send_sig_info(sig, SEND_SIG_PRIV, current, type); } BPF_CALL_1(bpf_send_signal, u32, sig) { return bpf_send_signal_common(sig, PIDTYPE_TGID); } static const struct bpf_func_proto bpf_send_signal_proto = { .func = bpf_send_signal, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_send_signal_thread, u32, sig) { return bpf_send_signal_common(sig, PIDTYPE_PID); } static const struct bpf_func_proto bpf_send_signal_thread_proto = { .func = bpf_send_signal_thread, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) { struct path copy; long len; char *p; if (!sz) return 0; /* * The path pointer is verified as trusted and safe to use, * but let's double check it's valid anyway to workaround * potentially broken verifier. */ len = copy_from_kernel_nofault(&copy, path, sizeof(*path)); if (len < 0) return len; p = d_path(&copy, buf, sz); if (IS_ERR(p)) { len = PTR_ERR(p); } else { len = buf + sz - p; memmove(buf, p, len); } return len; } BTF_SET_START(btf_allowlist_d_path) #ifdef CONFIG_SECURITY BTF_ID(func, security_file_permission) BTF_ID(func, security_inode_getattr) BTF_ID(func, security_file_open) #endif #ifdef CONFIG_SECURITY_PATH BTF_ID(func, security_path_truncate) #endif BTF_ID(func, vfs_truncate) BTF_ID(func, vfs_fallocate) BTF_ID(func, dentry_open) BTF_ID(func, vfs_getattr) BTF_ID(func, filp_close) BTF_SET_END(btf_allowlist_d_path) static bool bpf_d_path_allowed(const struct bpf_prog *prog) { if (prog->type == BPF_PROG_TYPE_TRACING && prog->expected_attach_type == BPF_TRACE_ITER) return true; if (prog->type == BPF_PROG_TYPE_LSM) return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id); } BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) static const struct bpf_func_proto bpf_d_path_proto = { .func = bpf_d_path, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_d_path_btf_ids[0], .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .allowed = bpf_d_path_allowed, }; #define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ BTF_F_PTR_RAW | BTF_F_ZERO) static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags, const struct btf **btf, s32 *btf_id) { const struct btf_type *t; if (unlikely(flags & ~(BTF_F_ALL))) return -EINVAL; if (btf_ptr_size != sizeof(struct btf_ptr)) return -EINVAL; *btf = bpf_get_btf_vmlinux(); if (IS_ERR_OR_NULL(*btf)) return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL; if (ptr->type_id > 0) *btf_id = ptr->type_id; else return -EINVAL; if (*btf_id > 0) t = btf_type_by_id(*btf, *btf_id); if (*btf_id <= 0 || !t) return -ENOENT; return 0; } BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, u32, btf_ptr_size, u64, flags) { const struct btf *btf; s32 btf_id; int ret; ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); if (ret) return ret; return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, flags); } const struct bpf_func_proto bpf_snprintf_btf_proto = { .func = bpf_snprintf_btf, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE, .arg5_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) { /* This helper call is inlined by verifier. */ return ((u64 *)ctx)[-2]; } static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { .func = bpf_get_func_ip_tracing, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; #ifdef CONFIG_X86_KERNEL_IBT static unsigned long get_entry_ip(unsigned long fentry_ip) { u32 instr; /* Being extra safe in here in case entry ip is on the page-edge. */ if (get_kernel_nofault(instr, (u32 *) fentry_ip - 1)) return fentry_ip; if (is_endbr(instr)) fentry_ip -= ENDBR_INSN_SIZE; return fentry_ip; } #else #define get_entry_ip(fentry_ip) fentry_ip #endif BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { struct bpf_trace_run_ctx *run_ctx __maybe_unused; struct kprobe *kp; #ifdef CONFIG_UPROBES run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); if (run_ctx->is_uprobe) return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr; #endif kp = kprobe_running(); if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY)) return 0; return get_entry_ip((uintptr_t)kp->addr); } static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { .func = bpf_get_func_ip_kprobe, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs) { return bpf_kprobe_multi_entry_ip(current->bpf_ctx); } static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = { .func = bpf_get_func_ip_kprobe_multi, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs) { return bpf_kprobe_multi_cookie(current->bpf_ctx); } static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = { .func = bpf_get_attach_cookie_kprobe_multi, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs) { return bpf_uprobe_multi_entry_ip(current->bpf_ctx); } static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = { .func = bpf_get_func_ip_uprobe_multi, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs) { return bpf_uprobe_multi_cookie(current->bpf_ctx); } static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = { .func = bpf_get_attach_cookie_uprobe_multi, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) { struct bpf_trace_run_ctx *run_ctx; run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); return run_ctx->bpf_cookie; } static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = { .func = bpf_get_attach_cookie_trace, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx) { return ctx->event->bpf_cookie; } static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { .func = bpf_get_attach_cookie_pe, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx) { struct bpf_trace_run_ctx *run_ctx; run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); return run_ctx->bpf_cookie; } static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { .func = bpf_get_attach_cookie_tracing, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) { #ifndef CONFIG_X86 return -ENOENT; #else static const u32 br_entry_size = sizeof(struct perf_branch_entry); u32 entry_cnt = size / br_entry_size; entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt); if (unlikely(flags)) return -EINVAL; if (!entry_cnt) return -ENOENT; return entry_cnt * br_entry_size; #endif } static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { .func = bpf_get_branch_snapshot, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) { /* This helper call is inlined by verifier. */ u64 nr_args = ((u64 *)ctx)[-1]; if ((u64) n >= nr_args) return -EINVAL; *value = ((u64 *)ctx)[n]; return 0; } static const struct bpf_func_proto bpf_get_func_arg_proto = { .func = get_func_arg, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_LONG, }; BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) { /* This helper call is inlined by verifier. */ u64 nr_args = ((u64 *)ctx)[-1]; *value = ((u64 *)ctx)[nr_args]; return 0; } static const struct bpf_func_proto bpf_get_func_ret_proto = { .func = get_func_ret, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_LONG, }; BPF_CALL_1(get_func_arg_cnt, void *, ctx) { /* This helper call is inlined by verifier. */ return ((u64 *)ctx)[-1]; } static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { .func = get_func_arg_cnt, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; #ifdef CONFIG_KEYS __bpf_kfunc_start_defs(); /** * bpf_lookup_user_key - lookup a key by its serial * @serial: key handle serial number * @flags: lookup-specific flags * * Search a key with a given *serial* and the provided *flags*. * If found, increment the reference count of the key by one, and * return it in the bpf_key structure. * * The bpf_key structure must be passed to bpf_key_put() when done * with it, so that the key reference count is decremented and the * bpf_key structure is freed. * * Permission checks are deferred to the time the key is used by * one of the available key-specific kfuncs. * * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested * special keyring (e.g. session keyring), if it doesn't yet exist. * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting * for the key construction, and to retrieve uninstantiated keys (keys * without data attached to them). * * Return: a bpf_key pointer with a valid key pointer if the key is found, a * NULL pointer otherwise. */ __bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) { key_ref_t key_ref; struct bpf_key *bkey; if (flags & ~KEY_LOOKUP_ALL) return NULL; /* * Permission check is deferred until the key is used, as the * intent of the caller is unknown here. */ key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK); if (IS_ERR(key_ref)) return NULL; bkey = kmalloc(sizeof(*bkey), GFP_KERNEL); if (!bkey) { key_put(key_ref_to_ptr(key_ref)); return NULL; } bkey->key = key_ref_to_ptr(key_ref); bkey->has_ref = true; return bkey; } /** * bpf_lookup_system_key - lookup a key by a system-defined ID * @id: key ID * * Obtain a bpf_key structure with a key pointer set to the passed key ID. * The key pointer is marked as invalid, to prevent bpf_key_put() from * attempting to decrement the key reference count on that pointer. The key * pointer set in such way is currently understood only by * verify_pkcs7_signature(). * * Set *id* to one of the values defined in include/linux/verification.h: * 0 for the primary keyring (immutable keyring of system keys); * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring * (where keys can be added only if they are vouched for by existing keys * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform * keyring (primarily used by the integrity subsystem to verify a kexec'ed * kerned image and, possibly, the initramfs signature). * * Return: a bpf_key pointer with an invalid key pointer set from the * pre-determined ID on success, a NULL pointer otherwise */ __bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id) { struct bpf_key *bkey; if (system_keyring_id_check(id) < 0) return NULL; bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC); if (!bkey) return NULL; bkey->key = (struct key *)(unsigned long)id; bkey->has_ref = false; return bkey; } /** * bpf_key_put - decrement key reference count if key is valid and free bpf_key * @bkey: bpf_key structure * * Decrement the reference count of the key inside *bkey*, if the pointer * is valid, and free *bkey*. */ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) { if (bkey->has_ref) key_put(bkey->key); kfree(bkey); } #ifdef CONFIG_SYSTEM_DATA_VERIFICATION /** * bpf_verify_pkcs7_signature - verify a PKCS#7 signature * @data_ptr: data to verify * @sig_ptr: signature of the data * @trusted_keyring: keyring with keys trusted for signature verification * * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* * with keys in a keyring referenced by *trusted_keyring*. * * Return: 0 on success, a negative value on error. */ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, struct bpf_dynptr_kern *sig_ptr, struct bpf_key *trusted_keyring) { const void *data, *sig; u32 data_len, sig_len; int ret; if (trusted_keyring->has_ref) { /* * Do the permission check deferred in bpf_lookup_user_key(). * See bpf_lookup_user_key() for more details. * * A call to key_task_permission() here would be redundant, as * it is already done by keyring_search() called by * find_asymmetric_key(). */ ret = key_validate(trusted_keyring->key); if (ret < 0) return ret; } data_len = __bpf_dynptr_size(data_ptr); data = __bpf_dynptr_data(data_ptr, data_len); sig_len = __bpf_dynptr_size(sig_ptr); sig = __bpf_dynptr_data(sig_ptr, sig_len); return verify_pkcs7_signature(data, data_len, sig, sig_len, trusted_keyring->key, VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); } #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ __bpf_kfunc_end_defs(); BTF_SET8_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) #endif BTF_SET8_END(key_sig_kfunc_set) static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { .owner = THIS_MODULE, .set = &key_sig_kfunc_set, }; static int __init bpf_key_sig_kfuncs_init(void) { return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_key_sig_kfunc_set); } late_initcall(bpf_key_sig_kfuncs_init); #endif /* CONFIG_KEYS */ /* filesystem kfuncs */ __bpf_kfunc_start_defs(); /** * bpf_get_file_xattr - get xattr of a file * @file: file to get xattr from * @name__str: name of the xattr * @value_ptr: output buffer of the xattr value * * Get xattr *name__str* of *file* and store the output in *value_ptr*. * * For security reasons, only *name__str* with prefix "user." is allowed. * * Return: 0 on success, a negative value on error. */ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, struct bpf_dynptr_kern *value_ptr) { struct dentry *dentry; u32 value_len; void *value; int ret; if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) return -EPERM; value_len = __bpf_dynptr_size(value_ptr); value = __bpf_dynptr_data_rw(value_ptr, value_len); if (!value) return -EINVAL; dentry = file_dentry(file); ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ); if (ret) return ret; return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len); } __bpf_kfunc_end_defs(); BTF_SET8_START(fs_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) BTF_SET8_END(fs_kfunc_set_ids) static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) { if (!btf_id_set8_contains(&fs_kfunc_set_ids, kfunc_id)) return 0; /* Only allow to attach from LSM hooks, to avoid recursion */ return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0; } static const struct btf_kfunc_id_set bpf_fs_kfunc_set = { .owner = THIS_MODULE, .set = &fs_kfunc_set_ids, .filter = bpf_get_file_xattr_filter, }; static int __init bpf_fs_kfuncs_init(void) { return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set); } late_initcall(bpf_fs_kfuncs_init); static const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_map_lookup_elem: return &bpf_map_lookup_elem_proto; case BPF_FUNC_map_update_elem: return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; case BPF_FUNC_map_push_elem: return &bpf_map_push_elem_proto; case BPF_FUNC_map_pop_elem: return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; case BPF_FUNC_map_lookup_percpu_elem: return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_get_current_pid_tgid: return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; case BPF_FUNC_get_current_task_btf: return &bpf_get_current_task_btf_proto; case BPF_FUNC_task_pt_regs: return &bpf_task_pt_regs_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; case BPF_FUNC_get_numa_node_id: return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; case BPF_FUNC_current_task_under_cgroup: return &bpf_current_task_under_cgroup_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_probe_write_user: return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? NULL : bpf_get_probe_write_proto(); case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE case BPF_FUNC_probe_read: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_str: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_str_proto; #endif #ifdef CONFIG_CGROUPS case BPF_FUNC_cgrp_storage_get: return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; #endif case BPF_FUNC_send_signal: return &bpf_send_signal_proto; case BPF_FUNC_send_signal_thread: return &bpf_send_signal_thread_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; case BPF_FUNC_get_ns_current_pid_tgid: return &bpf_get_ns_current_pid_tgid_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: return &bpf_ringbuf_reserve_proto; case BPF_FUNC_ringbuf_submit: return &bpf_ringbuf_submit_proto; case BPF_FUNC_ringbuf_discard: return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; case BPF_FUNC_get_task_stack: return &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: return &bpf_copy_from_user_proto; case BPF_FUNC_copy_from_user_task: return &bpf_copy_from_user_task_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; case BPF_FUNC_task_storage_get: if (bpf_prog_check_recur(prog)) return &bpf_task_storage_get_recur_proto; return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: if (bpf_prog_check_recur(prog)) return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_snprintf: return &bpf_snprintf_proto; case BPF_FUNC_get_func_ip: return &bpf_get_func_ip_proto_tracing; case BPF_FUNC_get_branch_snapshot: return &bpf_get_branch_snapshot_proto; case BPF_FUNC_find_vma: return &bpf_find_vma_proto; case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); default: return bpf_base_func_proto(func_id); } } static const struct bpf_func_proto * kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; case BPF_FUNC_get_stack: return &bpf_get_stack_proto; #ifdef CONFIG_BPF_KPROBE_OVERRIDE case BPF_FUNC_override_return: return &bpf_override_return_proto; #endif case BPF_FUNC_get_func_ip: if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) return &bpf_get_func_ip_proto_kprobe_multi; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) return &bpf_get_func_ip_proto_uprobe_multi; return &bpf_get_func_ip_proto_kprobe; case BPF_FUNC_get_attach_cookie: if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) return &bpf_get_attach_cookie_proto_kmulti; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) return &bpf_get_attach_cookie_proto_umulti; return &bpf_get_attach_cookie_proto_trace; default: return bpf_tracing_func_proto(func_id, prog); } } /* bpf+kprobe programs can access fields of 'struct pt_regs' */ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { if (off < 0 || off >= sizeof(struct pt_regs)) return false; if (type != BPF_READ) return false; if (off % size != 0) return false; /* * Assertion for 32 bit to make sure last 8 byte access * (BPF_DW) to the last 4 byte member is disallowed. */ if (off + size > sizeof(struct pt_regs)) return false; return true; } const struct bpf_verifier_ops kprobe_verifier_ops = { .get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; const struct bpf_prog_ops kprobe_prog_ops = { }; BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, u64, flags, void *, data, u64, size) { struct pt_regs *regs = *(struct pt_regs **)tp_buff; /* * r1 points to perf tracepoint buffer where first 8 bytes are hidden * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it * from there and call the same bpf_perf_event_output() helper inline. */ return ____bpf_perf_event_output(regs, map, flags, data, size); } static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .func = bpf_perf_event_output_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, u64, flags) { struct pt_regs *regs = *(struct pt_regs **)tp_buff; /* * Same comment as in bpf_perf_event_output_tp(), only that this time * the other helper's function body cannot be inlined due to being * external, thus we need to call raw helper function. */ return bpf_get_stackid((unsigned long) regs, (unsigned long) map, flags, 0, 0); } static const struct bpf_func_proto bpf_get_stackid_proto_tp = { .func = bpf_get_stackid_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, u64, flags) { struct pt_regs *regs = *(struct pt_regs **)tp_buff; return bpf_get_stack((unsigned long) regs, (unsigned long) buf, (unsigned long) size, flags, 0); } static const struct bpf_func_proto bpf_get_stack_proto_tp = { .func = bpf_get_stack_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; static const struct bpf_func_proto * tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_tp; case BPF_FUNC_get_stack: return &bpf_get_stack_proto_tp; case BPF_FUNC_get_attach_cookie: return &bpf_get_attach_cookie_proto_trace; default: return bpf_tracing_func_proto(func_id, prog); } } static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; if (type != BPF_READ) return false; if (off % size != 0) return false; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); return true; } const struct bpf_verifier_ops tracepoint_verifier_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = tp_prog_is_valid_access, }; const struct bpf_prog_ops tracepoint_prog_ops = { }; BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, struct bpf_perf_event_value *, buf, u32, size) { int err = -EINVAL; if (unlikely(size != sizeof(struct bpf_perf_event_value))) goto clear; err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, &buf->running); if (unlikely(err)) goto clear; return 0; clear: memset(buf, 0, size); return err; } static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { .func = bpf_perf_prog_read_value, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE, }; BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, void *, buf, u32, size, u64, flags) { static const u32 br_entry_size = sizeof(struct perf_branch_entry); struct perf_branch_stack *br_stack = ctx->data->br_stack; u32 to_copy; if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) return -EINVAL; if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK))) return -ENOENT; if (unlikely(!br_stack)) return -ENOENT; if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) return br_stack->nr * br_entry_size; if (!buf || (size % br_entry_size != 0)) return -EINVAL; to_copy = min_t(u32, br_stack->nr * br_entry_size, size); memcpy(buf, br_stack->entries, to_copy); return to_copy; } static const struct bpf_func_proto bpf_read_branch_records_proto = { .func = bpf_read_branch_records, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM_OR_NULL, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; static const struct bpf_func_proto * pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_pe; case BPF_FUNC_get_stack: return &bpf_get_stack_proto_pe; case BPF_FUNC_perf_prog_read_value: return &bpf_perf_prog_read_value_proto; case BPF_FUNC_read_branch_records: return &bpf_read_branch_records_proto; case BPF_FUNC_get_attach_cookie: return &bpf_get_attach_cookie_proto_pe; default: return bpf_tracing_func_proto(func_id, prog); } } /* * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp * to avoid potential recursive reuse issue when/if tracepoints are added * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. * * Since raw tracepoints run despite bpf_prog_active, support concurrent usage * in normal, irq, and nmi context. */ struct bpf_raw_tp_regs { struct pt_regs regs[3]; }; static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); static struct pt_regs *get_bpf_raw_tp_regs(void) { struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { this_cpu_dec(bpf_raw_tp_nest_level); return ERR_PTR(-EBUSY); } return &tp_regs->regs[nest_level - 1]; } static void put_bpf_raw_tp_regs(void) { this_cpu_dec(bpf_raw_tp_nest_level); } BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags, void *, data, u64, size) { struct pt_regs *regs = get_bpf_raw_tp_regs(); int ret; if (IS_ERR(regs)) return PTR_ERR(regs); perf_fetch_caller_regs(regs); ret = ____bpf_perf_event_output(regs, map, flags, data, size); put_bpf_raw_tp_regs(); return ret; } static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { .func = bpf_perf_event_output_raw_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; extern const struct bpf_func_proto bpf_skb_output_proto; extern const struct bpf_func_proto bpf_xdp_output_proto; extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) { struct pt_regs *regs = get_bpf_raw_tp_regs(); int ret; if (IS_ERR(regs)) return PTR_ERR(regs); perf_fetch_caller_regs(regs); /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, flags, 0, 0); put_bpf_raw_tp_regs(); return ret; } static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { .func = bpf_get_stackid_raw_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, void *, buf, u32, size, u64, flags) { struct pt_regs *regs = get_bpf_raw_tp_regs(); int ret; if (IS_ERR(regs)) return PTR_ERR(regs); perf_fetch_caller_regs(regs); ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, (unsigned long) size, flags, 0); put_bpf_raw_tp_regs(); return ret; } static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { .func = bpf_get_stack_raw_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; static const struct bpf_func_proto * raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto_raw_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_raw_tp; case BPF_FUNC_get_stack: return &bpf_get_stack_proto_raw_tp; default: return bpf_tracing_func_proto(func_id, prog); } } const struct bpf_func_proto * tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { const struct bpf_func_proto *fn; switch (func_id) { #ifdef CONFIG_NET case BPF_FUNC_skb_output: return &bpf_skb_output_proto; case BPF_FUNC_xdp_output: return &bpf_xdp_output_proto; case BPF_FUNC_skc_to_tcp6_sock: return &bpf_skc_to_tcp6_sock_proto; case BPF_FUNC_skc_to_tcp_sock: return &bpf_skc_to_tcp_sock_proto; case BPF_FUNC_skc_to_tcp_timewait_sock: return &bpf_skc_to_tcp_timewait_sock_proto; case BPF_FUNC_skc_to_tcp_request_sock: return &bpf_skc_to_tcp_request_sock_proto; case BPF_FUNC_skc_to_udp6_sock: return &bpf_skc_to_udp6_sock_proto; case BPF_FUNC_skc_to_unix_sock: return &bpf_skc_to_unix_sock_proto; case BPF_FUNC_skc_to_mptcp_sock: return &bpf_skc_to_mptcp_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_tracing_proto; case BPF_FUNC_sock_from_file: return &bpf_sock_from_file_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_ptr_cookie_proto; case BPF_FUNC_xdp_get_buff_len: return &bpf_xdp_get_buff_len_trace_proto; #endif case BPF_FUNC_seq_printf: return prog->expected_attach_type == BPF_TRACE_ITER ? &bpf_seq_printf_proto : NULL; case BPF_FUNC_seq_write: return prog->expected_attach_type == BPF_TRACE_ITER ? &bpf_seq_write_proto : NULL; case BPF_FUNC_seq_printf_btf: return prog->expected_attach_type == BPF_TRACE_ITER ? &bpf_seq_printf_btf_proto : NULL; case BPF_FUNC_d_path: return &bpf_d_path_proto; case BPF_FUNC_get_func_arg: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL; case BPF_FUNC_get_func_ret: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; case BPF_FUNC_get_func_arg_cnt: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; case BPF_FUNC_get_attach_cookie: return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) fn = bpf_iter_get_func_proto(func_id, prog); return fn; } } static bool raw_tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { return bpf_tracing_ctx_access(off, size, type); } static bool tracing_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { return bpf_tracing_btf_ctx_access(off, size, type, prog, info); } int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { return -ENOTSUPP; } const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { .get_func_proto = raw_tp_prog_func_proto, .is_valid_access = raw_tp_prog_is_valid_access, }; const struct bpf_prog_ops raw_tracepoint_prog_ops = { #ifdef CONFIG_NET .test_run = bpf_prog_test_run_raw_tp, #endif }; const struct bpf_verifier_ops tracing_verifier_ops = { .get_func_proto = tracing_prog_func_proto, .is_valid_access = tracing_prog_is_valid_access, }; const struct bpf_prog_ops tracing_prog_ops = { .test_run = bpf_prog_test_run_tracing, }; static bool raw_tp_writable_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { if (off == 0) { if (size != sizeof(u64) || type != BPF_READ) return false; info->reg_type = PTR_TO_TP_BUFFER; } return raw_tp_prog_is_valid_access(off, size, type, prog, info); } const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { .get_func_proto = raw_tp_prog_func_proto, .is_valid_access = raw_tp_writable_prog_is_valid_access, }; const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { }; static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { const int size_u64 = sizeof(u64); if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) return false; if (type != BPF_READ) return false; if (off % size != 0) { if (sizeof(unsigned long) != 4) return false; if (size != 8) return false; if (off % size != 4) return false; } switch (off) { case bpf_ctx_range(struct bpf_perf_event_data, sample_period): bpf_ctx_record_field_size(info, size_u64); if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) return false; break; case bpf_ctx_range(struct bpf_perf_event_data, addr): bpf_ctx_record_field_size(info, size_u64); if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) return false; break; default: if (size != sizeof(long)) return false; } return true; } static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; switch (si->off) { case offsetof(struct bpf_perf_event_data, sample_period): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, data), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, bpf_target_off(struct perf_sample_data, period, 8, target_size)); break; case offsetof(struct bpf_perf_event_data, addr): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, data), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, bpf_target_off(struct perf_sample_data, addr, 8, target_size)); break; default: *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, regs)); *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, si->off); break; } return insn - insn_buf; } const struct bpf_verifier_ops perf_event_verifier_ops = { .get_func_proto = pe_prog_func_proto, .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; const struct bpf_prog_ops perf_event_prog_ops = { }; static DEFINE_MUTEX(bpf_event_mutex); #define BPF_TRACE_MAX_PROGS 64 int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie) { struct bpf_prog_array *old_array; struct bpf_prog_array *new_array; int ret = -EEXIST; /* * Kprobe override only works if they are on the function entry, * and only if they are on the opt-in list. */ if (prog->kprobe_override && (!trace_kprobe_on_func_entry(event->tp_event) || !trace_kprobe_error_injectable(event->tp_event))) return -EINVAL; mutex_lock(&bpf_event_mutex); if (event->prog) goto unlock; old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); if (old_array && bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { ret = -E2BIG; goto unlock; } ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array); if (ret < 0) goto unlock; /* set the new array to event->tp_event and set event->prog */ event->prog = prog; event->bpf_cookie = bpf_cookie; rcu_assign_pointer(event->tp_event->prog_array, new_array); bpf_prog_array_free_sleepable(old_array); unlock: mutex_unlock(&bpf_event_mutex); return ret; } void perf_event_detach_bpf_prog(struct perf_event *event) { struct bpf_prog_array *old_array; struct bpf_prog_array *new_array; int ret; mutex_lock(&bpf_event_mutex); if (!event->prog) goto unlock; old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); if (ret == -ENOENT) goto unlock; if (ret < 0) { bpf_prog_array_delete_safe(old_array, event->prog); } else { rcu_assign_pointer(event->tp_event->prog_array, new_array); bpf_prog_array_free_sleepable(old_array); } bpf_prog_put(event->prog); event->prog = NULL; unlock: mutex_unlock(&bpf_event_mutex); } int perf_event_query_prog_array(struct perf_event *event, void __user *info) { struct perf_event_query_bpf __user *uquery = info; struct perf_event_query_bpf query = {}; struct bpf_prog_array *progs; u32 *ids, prog_cnt, ids_len; int ret; if (!perfmon_capable()) return -EPERM; if (event->attr.type != PERF_TYPE_TRACEPOINT) return -EINVAL; if (copy_from_user(&query, uquery, sizeof(query))) return -EFAULT; ids_len = query.ids_len; if (ids_len > BPF_TRACE_MAX_PROGS) return -E2BIG; ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); if (!ids) return -ENOMEM; /* * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which * is required when user only wants to check for uquery->prog_cnt. * There is no need to check for it since the case is handled * gracefully in bpf_prog_array_copy_info. */ mutex_lock(&bpf_event_mutex); progs = bpf_event_rcu_dereference(event->tp_event->prog_array); ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); mutex_unlock(&bpf_event_mutex); if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) ret = -EFAULT; kfree(ids); return ret; } extern struct bpf_raw_event_map __start__bpf_raw_tp[]; extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) { struct bpf_raw_event_map *btp = __start__bpf_raw_tp; for (; btp < __stop__bpf_raw_tp; btp++) { if (!strcmp(btp->tp->name, name)) return btp; } return bpf_get_raw_tracepoint_module(name); } void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { struct module *mod; preempt_disable(); mod = __module_address((unsigned long)btp); module_put(mod); preempt_enable(); } static __always_inline void __bpf_trace_run(struct bpf_prog *prog, u64 *args) { cant_sleep(); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); goto out; } rcu_read_lock(); (void) bpf_prog_run(prog, args); rcu_read_unlock(); out: this_cpu_dec(*(prog->active)); } #define UNPACK(...) __VA_ARGS__ #define REPEAT_1(FN, DL, X, ...) FN(X) #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__) #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__) #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__) #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__) #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__) #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__) #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__) #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__) #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__) #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__) #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__) #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__) #define SARG(X) u64 arg##X #define COPY(X) args[X] = arg##X #define __DL_COM (,) #define __DL_SEM (;) #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 #define BPF_TRACE_DEFN_x(x) \ void bpf_trace_run##x(struct bpf_prog *prog, \ REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ { \ u64 args[x]; \ REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ __bpf_trace_run(prog, args); \ } \ EXPORT_SYMBOL_GPL(bpf_trace_run##x) BPF_TRACE_DEFN_x(1); BPF_TRACE_DEFN_x(2); BPF_TRACE_DEFN_x(3); BPF_TRACE_DEFN_x(4); BPF_TRACE_DEFN_x(5); BPF_TRACE_DEFN_x(6); BPF_TRACE_DEFN_x(7); BPF_TRACE_DEFN_x(8); BPF_TRACE_DEFN_x(9); BPF_TRACE_DEFN_x(10); BPF_TRACE_DEFN_x(11); BPF_TRACE_DEFN_x(12); static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { struct tracepoint *tp = btp->tp; /* * check that program doesn't access arguments beyond what's * available in this tracepoint */ if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) return -EINVAL; if (prog->aux->max_tp_access > btp->writable_size) return -EINVAL; return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, prog); } int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { return __bpf_probe_register(btp, prog); } int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); } int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr, unsigned long *missed) { bool is_tracepoint, is_syscall_tp; struct bpf_prog *prog; int flags, err = 0; prog = event->prog; if (!prog) return -ENOENT; /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */ if (prog->type == BPF_PROG_TYPE_PERF_EVENT) return -EOPNOTSUPP; *prog_id = prog->aux->id; flags = event->tp_event->flags; is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT; is_syscall_tp = is_syscall_trace_event(event->tp_event); if (is_tracepoint || is_syscall_tp) { *buf = is_tracepoint ? event->tp_event->tp->name : event->tp_event->name; /* We allow NULL pointer for tracepoint */ if (fd_type) *fd_type = BPF_FD_TYPE_TRACEPOINT; if (probe_offset) *probe_offset = 0x0; if (probe_addr) *probe_addr = 0x0; } else { /* kprobe/uprobe */ err = -EOPNOTSUPP; #ifdef CONFIG_KPROBE_EVENTS if (flags & TRACE_EVENT_FL_KPROBE) err = bpf_get_kprobe_info(event, fd_type, buf, probe_offset, probe_addr, missed, event->attr.type == PERF_TYPE_TRACEPOINT); #endif #ifdef CONFIG_UPROBE_EVENTS if (flags & TRACE_EVENT_FL_UPROBE) err = bpf_get_uprobe_info(event, fd_type, buf, probe_offset, probe_addr, event->attr.type == PERF_TYPE_TRACEPOINT); #endif } return err; } static int __init send_signal_irq_work_init(void) { int cpu; struct send_signal_irq_work *work; for_each_possible_cpu(cpu) { work = per_cpu_ptr(&send_signal_work, cpu); init_irq_work(&work->irq_work, do_bpf_send_signal); } return 0; } subsys_initcall(send_signal_irq_work_init); #ifdef CONFIG_MODULES static int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module) { struct bpf_trace_module *btm, *tmp; struct module *mod = module; int ret = 0; if (mod->num_bpf_raw_events == 0 || (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) goto out; mutex_lock(&bpf_module_mutex); switch (op) { case MODULE_STATE_COMING: btm = kzalloc(sizeof(*btm), GFP_KERNEL); if (btm) { btm->module = module; list_add(&btm->list, &bpf_trace_modules); } else { ret = -ENOMEM; } break; case MODULE_STATE_GOING: list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { if (btm->module == module) { list_del(&btm->list); kfree(btm); break; } } break; } mutex_unlock(&bpf_module_mutex); out: return notifier_from_errno(ret); } static struct notifier_block bpf_module_nb = { .notifier_call = bpf_event_notify, }; static int __init bpf_event_init(void) { register_module_notifier(&bpf_module_nb); return 0; } fs_initcall(bpf_event_init); #endif /* CONFIG_MODULES */ #ifdef CONFIG_FPROBE struct bpf_kprobe_multi_link { struct bpf_link link; struct fprobe fp; unsigned long *addrs; u64 *cookies; u32 cnt; u32 mods_cnt; struct module **mods; u32 flags; }; struct bpf_kprobe_multi_run_ctx { struct bpf_run_ctx run_ctx; struct bpf_kprobe_multi_link *link; unsigned long entry_ip; }; struct user_syms { const char **syms; char *buf; }; static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) { unsigned long __user usymbol; const char **syms = NULL; char *buf = NULL, *p; int err = -ENOMEM; unsigned int i; syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL); if (!syms) goto error; buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL); if (!buf) goto error; for (p = buf, i = 0; i < cnt; i++) { if (__get_user(usymbol, usyms + i)) { err = -EFAULT; goto error; } err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN); if (err == KSYM_NAME_LEN) err = -E2BIG; if (err < 0) goto error; syms[i] = p; p += err + 1; } us->syms = syms; us->buf = buf; return 0; error: if (err) { kvfree(syms); kvfree(buf); } return err; } static void kprobe_multi_put_modules(struct module **mods, u32 cnt) { u32 i; for (i = 0; i < cnt; i++) module_put(mods[i]); } static void free_user_syms(struct user_syms *us) { kvfree(us->syms); kvfree(us->buf); } static void bpf_kprobe_multi_link_release(struct bpf_link *link) { struct bpf_kprobe_multi_link *kmulti_link; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); unregister_fprobe(&kmulti_link->fp); kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); } static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) { struct bpf_kprobe_multi_link *kmulti_link; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); kvfree(kmulti_link->addrs); kvfree(kmulti_link->cookies); kfree(kmulti_link->mods); kfree(kmulti_link); } static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs); struct bpf_kprobe_multi_link *kmulti_link; u32 ucount = info->kprobe_multi.count; int err = 0, i; if (!uaddrs ^ !ucount) return -EINVAL; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; info->kprobe_multi.flags = kmulti_link->flags; info->kprobe_multi.missed = kmulti_link->fp.nmissed; if (!uaddrs) return 0; if (ucount < kmulti_link->cnt) err = -ENOSPC; else ucount = kmulti_link->cnt; if (kallsyms_show_value(current_cred())) { if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64))) return -EFAULT; } else { for (i = 0; i < ucount; i++) { if (put_user(0, uaddrs + i)) return -EFAULT; } } return err; } static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { .release = bpf_kprobe_multi_link_release, .dealloc = bpf_kprobe_multi_link_dealloc, .fill_link_info = bpf_kprobe_multi_link_fill_link_info, }; static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv) { const struct bpf_kprobe_multi_link *link = priv; unsigned long *addr_a = a, *addr_b = b; u64 *cookie_a, *cookie_b; cookie_a = link->cookies + (addr_a - link->addrs); cookie_b = link->cookies + (addr_b - link->addrs); /* swap addr_a/addr_b and cookie_a/cookie_b values */ swap(*addr_a, *addr_b); swap(*cookie_a, *cookie_b); } static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b) { const unsigned long *addr_a = a, *addr_b = b; if (*addr_a == *addr_b) return 0; return *addr_a < *addr_b ? -1 : 1; } static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv) { return bpf_kprobe_multi_addrs_cmp(a, b); } static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) { struct bpf_kprobe_multi_run_ctx *run_ctx; struct bpf_kprobe_multi_link *link; u64 *cookie, entry_ip; unsigned long *addr; if (WARN_ON_ONCE(!ctx)) return 0; run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx); link = run_ctx->link; if (!link->cookies) return 0; entry_ip = run_ctx->entry_ip; addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip), bpf_kprobe_multi_addrs_cmp); if (!addr) return 0; cookie = link->cookies + (addr - link->addrs); return *cookie; } static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) { struct bpf_kprobe_multi_run_ctx *run_ctx; run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx); return run_ctx->entry_ip; } static int kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, unsigned long entry_ip, struct pt_regs *regs) { struct bpf_kprobe_multi_run_ctx run_ctx = { .link = link, .entry_ip = entry_ip, }; struct bpf_run_ctx *old_run_ctx; int err; if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { bpf_prog_inc_misses_counter(link->link.prog); err = 0; goto out; } migrate_disable(); rcu_read_lock(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); err = bpf_prog_run(link->link.prog, regs); bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); out: __this_cpu_dec(bpf_prog_active); return err; } static int kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, unsigned long ret_ip, struct pt_regs *regs, void *data) { struct bpf_kprobe_multi_link *link; link = container_of(fp, struct bpf_kprobe_multi_link, fp); kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); return 0; } static void kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip, unsigned long ret_ip, struct pt_regs *regs, void *data) { struct bpf_kprobe_multi_link *link; link = container_of(fp, struct bpf_kprobe_multi_link, fp); kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); } static int symbols_cmp_r(const void *a, const void *b, const void *priv) { const char **str_a = (const char **) a; const char **str_b = (const char **) b; return strcmp(*str_a, *str_b); } struct multi_symbols_sort { const char **funcs; u64 *cookies; }; static void symbols_swap_r(void *a, void *b, int size, const void *priv) { const struct multi_symbols_sort *data = priv; const char **name_a = a, **name_b = b; swap(*name_a, *name_b); /* If defined, swap also related cookies. */ if (data->cookies) { u64 *cookie_a, *cookie_b; cookie_a = data->cookies + (name_a - data->funcs); cookie_b = data->cookies + (name_b - data->funcs); swap(*cookie_a, *cookie_b); } } struct modules_array { struct module **mods; int mods_cnt; int mods_cap; }; static int add_module(struct modules_array *arr, struct module *mod) { struct module **mods; if (arr->mods_cnt == arr->mods_cap) { arr->mods_cap = max(16, arr->mods_cap * 3 / 2); mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); if (!mods) return -ENOMEM; arr->mods = mods; } arr->mods[arr->mods_cnt] = mod; arr->mods_cnt++; return 0; } static bool has_module(struct modules_array *arr, struct module *mod) { int i; for (i = arr->mods_cnt - 1; i >= 0; i--) { if (arr->mods[i] == mod) return true; } return false; } static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) { struct modules_array arr = {}; u32 i, err = 0; for (i = 0; i < addrs_cnt; i++) { struct module *mod; preempt_disable(); mod = __module_address(addrs[i]); /* Either no module or we it's already stored */ if (!mod || has_module(&arr, mod)) { preempt_enable(); continue; } if (!try_module_get(mod)) err = -EINVAL; preempt_enable(); if (err) break; err = add_module(&arr, mod); if (err) { module_put(mod); break; } } /* We return either err < 0 in case of error, ... */ if (err) { kprobe_multi_put_modules(arr.mods, arr.mods_cnt); kfree(arr.mods); return err; } /* or number of modules found if everything is ok. */ *mods = arr.mods; return arr.mods_cnt; } static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt) { u32 i; for (i = 0; i < cnt; i++) { if (!within_error_injection_list(addrs[i])) return -EINVAL; } return 0; } int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; struct bpf_link_primer link_primer; void __user *ucookies; unsigned long *addrs; u32 flags, cnt, size; void __user *uaddrs; u64 *cookies = NULL; void __user *usyms; int err; /* no support for 32bit archs yet */ if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI) return -EINVAL; flags = attr->link_create.kprobe_multi.flags; if (flags & ~BPF_F_KPROBE_MULTI_RETURN) return -EINVAL; uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs); usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms); if (!!uaddrs == !!usyms) return -EINVAL; cnt = attr->link_create.kprobe_multi.cnt; if (!cnt) return -EINVAL; if (cnt > MAX_KPROBE_MULTI_CNT) return -E2BIG; size = cnt * sizeof(*addrs); addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); if (!addrs) return -ENOMEM; ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); if (ucookies) { cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); if (!cookies) { err = -ENOMEM; goto error; } if (copy_from_user(cookies, ucookies, size)) { err = -EFAULT; goto error; } } if (uaddrs) { if (copy_from_user(addrs, uaddrs, size)) { err = -EFAULT; goto error; } } else { struct multi_symbols_sort data = { .cookies = cookies, }; struct user_syms us; err = copy_user_syms(&us, usyms, cnt); if (err) goto error; if (cookies) data.funcs = us.syms; sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r, symbols_swap_r, &data); err = ftrace_lookup_symbols(us.syms, cnt, addrs); free_user_syms(&us); if (err) goto error; } if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) { err = -EINVAL; goto error; } link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { err = -ENOMEM; goto error; } bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI, &bpf_kprobe_multi_link_lops, prog); err = bpf_link_prime(&link->link, &link_primer); if (err) goto error; if (flags & BPF_F_KPROBE_MULTI_RETURN) link->fp.exit_handler = kprobe_multi_link_exit_handler; else link->fp.entry_handler = kprobe_multi_link_handler; link->addrs = addrs; link->cookies = cookies; link->cnt = cnt; link->flags = flags; if (cookies) { /* * Sorting addresses will trigger sorting cookies as well * (check bpf_kprobe_multi_cookie_swap). This way we can * find cookie based on the address in bpf_get_attach_cookie * helper. */ sort_r(addrs, cnt, sizeof(*addrs), bpf_kprobe_multi_cookie_cmp, bpf_kprobe_multi_cookie_swap, link); } err = get_modules_for_addrs(&link->mods, addrs, cnt); if (err < 0) { bpf_link_cleanup(&link_primer); return err; } link->mods_cnt = err; err = register_fprobe_ips(&link->fp, addrs, cnt); if (err) { kprobe_multi_put_modules(link->mods, link->mods_cnt); bpf_link_cleanup(&link_primer); return err; } return bpf_link_settle(&link_primer); error: kfree(link); kvfree(addrs); kvfree(cookies); return err; } #else /* !CONFIG_FPROBE */ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) { return 0; } static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) { return 0; } #endif #ifdef CONFIG_UPROBES struct bpf_uprobe_multi_link; struct bpf_uprobe { struct bpf_uprobe_multi_link *link; loff_t offset; unsigned long ref_ctr_offset; u64 cookie; struct uprobe_consumer consumer; }; struct bpf_uprobe_multi_link { struct path path; struct bpf_link link; u32 cnt; u32 flags; struct bpf_uprobe *uprobes; struct task_struct *task; }; struct bpf_uprobe_multi_run_ctx { struct bpf_run_ctx run_ctx; unsigned long entry_ip; struct bpf_uprobe *uprobe; }; static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, u32 cnt) { u32 i; for (i = 0; i < cnt; i++) { uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, &uprobes[i].consumer); } } static void bpf_uprobe_multi_link_release(struct bpf_link *link) { struct bpf_uprobe_multi_link *umulti_link; umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); } static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) { struct bpf_uprobe_multi_link *umulti_link; umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); if (umulti_link->task) put_task_struct(umulti_link->task); path_put(&umulti_link->path); kvfree(umulti_link->uprobes); kfree(umulti_link); } static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets); u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies); u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets); u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path); u32 upath_size = info->uprobe_multi.path_size; struct bpf_uprobe_multi_link *umulti_link; u32 ucount = info->uprobe_multi.count; int err = 0, i; long left; if (!upath ^ !upath_size) return -EINVAL; if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount) return -EINVAL; umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); info->uprobe_multi.count = umulti_link->cnt; info->uprobe_multi.flags = umulti_link->flags; info->uprobe_multi.pid = umulti_link->task ? task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; if (upath) { char *p, *buf; upath_size = min_t(u32, upath_size, PATH_MAX); buf = kmalloc(upath_size, GFP_KERNEL); if (!buf) return -ENOMEM; p = d_path(&umulti_link->path, buf, upath_size); if (IS_ERR(p)) { kfree(buf); return PTR_ERR(p); } upath_size = buf + upath_size - p; left = copy_to_user(upath, p, upath_size); kfree(buf); if (left) return -EFAULT; info->uprobe_multi.path_size = upath_size; } if (!uoffsets && !ucookies && !uref_ctr_offsets) return 0; if (ucount < umulti_link->cnt) err = -ENOSPC; else ucount = umulti_link->cnt; for (i = 0; i < ucount; i++) { if (uoffsets && put_user(umulti_link->uprobes[i].offset, uoffsets + i)) return -EFAULT; if (uref_ctr_offsets && put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) return -EFAULT; if (ucookies && put_user(umulti_link->uprobes[i].cookie, ucookies + i)) return -EFAULT; } return err; } static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { .release = bpf_uprobe_multi_link_release, .dealloc = bpf_uprobe_multi_link_dealloc, .fill_link_info = bpf_uprobe_multi_link_fill_link_info, }; static int uprobe_prog_run(struct bpf_uprobe *uprobe, unsigned long entry_ip, struct pt_regs *regs) { struct bpf_uprobe_multi_link *link = uprobe->link; struct bpf_uprobe_multi_run_ctx run_ctx = { .entry_ip = entry_ip, .uprobe = uprobe, }; struct bpf_prog *prog = link->link.prog; bool sleepable = prog->aux->sleepable; struct bpf_run_ctx *old_run_ctx; int err = 0; if (link->task && current != link->task) return 0; if (sleepable) rcu_read_lock_trace(); else rcu_read_lock(); migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); err = bpf_prog_run(link->link.prog, regs); bpf_reset_run_ctx(old_run_ctx); migrate_enable(); if (sleepable) rcu_read_unlock_trace(); else rcu_read_unlock(); return err; } static bool uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { struct bpf_uprobe *uprobe; uprobe = container_of(con, struct bpf_uprobe, consumer); return uprobe->link->task->mm == mm; } static int uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) { struct bpf_uprobe *uprobe; uprobe = container_of(con, struct bpf_uprobe, consumer); return uprobe_prog_run(uprobe, instruction_pointer(regs), regs); } static int uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) { struct bpf_uprobe *uprobe; uprobe = container_of(con, struct bpf_uprobe, consumer); return uprobe_prog_run(uprobe, func, regs); } static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) { struct bpf_uprobe_multi_run_ctx *run_ctx; run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); return run_ctx->entry_ip; } static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) { struct bpf_uprobe_multi_run_ctx *run_ctx; run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); return run_ctx->uprobe->cookie; } int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_uprobe_multi_link *link = NULL; unsigned long __user *uref_ctr_offsets; struct bpf_link_primer link_primer; struct bpf_uprobe *uprobes = NULL; struct task_struct *task = NULL; unsigned long __user *uoffsets; u64 __user *ucookies; void __user *upath; u32 flags, cnt, i; struct path path; char *name; pid_t pid; int err; /* no support for 32bit archs yet */ if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI) return -EINVAL; flags = attr->link_create.uprobe_multi.flags; if (flags & ~BPF_F_UPROBE_MULTI_RETURN) return -EINVAL; /* * path, offsets and cnt are mandatory, * ref_ctr_offsets and cookies are optional */ upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); cnt = attr->link_create.uprobe_multi.cnt; if (!upath || !uoffsets || !cnt) return -EINVAL; if (cnt > MAX_UPROBE_MULTI_CNT) return -E2BIG; uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); name = strndup_user(upath, PATH_MAX); if (IS_ERR(name)) { err = PTR_ERR(name); return err; } err = kern_path(name, LOOKUP_FOLLOW, &path); kfree(name); if (err) return err; if (!d_is_reg(path.dentry)) { err = -EBADF; goto error_path_put; } pid = attr->link_create.uprobe_multi.pid; if (pid) { rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_PID); rcu_read_unlock(); if (!task) { err = -ESRCH; goto error_path_put; } } err = -ENOMEM; link = kzalloc(sizeof(*link), GFP_KERNEL); uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL); if (!uprobes || !link) goto error_free; for (i = 0; i < cnt; i++) { if (__get_user(uprobes[i].offset, uoffsets + i)) { err = -EFAULT; goto error_free; } if (uprobes[i].offset < 0) { err = -EINVAL; goto error_free; } if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) { err = -EFAULT; goto error_free; } if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { err = -EFAULT; goto error_free; } uprobes[i].link = link; if (flags & BPF_F_UPROBE_MULTI_RETURN) uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; else uprobes[i].consumer.handler = uprobe_multi_link_handler; if (pid) uprobes[i].consumer.filter = uprobe_multi_link_filter; } link->cnt = cnt; link->uprobes = uprobes; link->path = path; link->task = task; link->flags = flags; bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, &bpf_uprobe_multi_link_lops, prog); for (i = 0; i < cnt; i++) { err = uprobe_register_refctr(d_real_inode(link->path.dentry), uprobes[i].offset, uprobes[i].ref_ctr_offset, &uprobes[i].consumer); if (err) { bpf_uprobe_unregister(&path, uprobes, i); goto error_free; } } err = bpf_link_prime(&link->link, &link_primer); if (err) goto error_free; return bpf_link_settle(&link_primer); error_free: kvfree(uprobes); kfree(link); if (task) put_task_struct(task); error_path_put: path_put(&path); return err; } #else /* !CONFIG_UPROBES */ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) { return 0; } static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) { return 0; } #endif /* CONFIG_UPROBES */
104 46 2 100 4 102 98 41 86 89 88 22 35 89 54 54 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 /* mpicoder.c - Coder for the external representation of MPIs * Copyright (C) 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. * * GnuPG is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GnuPG is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include <linux/bitops.h> #include <linux/count_zeros.h> #include <linux/byteorder/generic.h> #include <linux/scatterlist.h> #include <linux/string.h> #include "mpi-internal.h" #define MAX_EXTERN_SCAN_BYTES (16*1024*1024) #define MAX_EXTERN_MPI_BITS 16384 /** * mpi_read_raw_data - Read a raw byte stream as a positive integer * @xbuffer: The data to read * @nbytes: The amount of data to read */ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) { const uint8_t *buffer = xbuffer; int i, j; unsigned nbits, nlimbs; mpi_limb_t a; MPI val = NULL; while (nbytes > 0 && buffer[0] == 0) { buffer++; nbytes--; } nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } if (nbytes > 0) nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; val->nbits = nbits; val->sign = 0; val->nlimbs = nlimbs; if (nbytes > 0) { i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; i %= BYTES_PER_MPI_LIMB; for (j = nlimbs; j > 0; j--) { a = 0; for (; i < BYTES_PER_MPI_LIMB; i++) { a <<= 8; a |= *buffer++; } i = 0; val->d[j - 1] = a; } } return val; } EXPORT_SYMBOL_GPL(mpi_read_raw_data); MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) { const uint8_t *buffer = xbuffer; unsigned int nbits, nbytes; MPI val; if (*ret_nread < 2) return ERR_PTR(-EINVAL); nbits = buffer[0] << 8 | buffer[1]; if (nbits > MAX_EXTERN_MPI_BITS) { pr_info("MPI: mpi too large (%u bits)\n", nbits); return ERR_PTR(-EINVAL); } nbytes = DIV_ROUND_UP(nbits, 8); if (nbytes + 2 > *ret_nread) { pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n", nbytes, *ret_nread); return ERR_PTR(-EINVAL); } val = mpi_read_raw_data(buffer + 2, nbytes); if (!val) return ERR_PTR(-ENOMEM); *ret_nread = nbytes + 2; return val; } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); /**************** * Fill the mpi VAL from the hex string in STR. */ int mpi_fromstr(MPI val, const char *str) { int sign = 0; int prepend_zero = 0; int i, j, c, c1, c2; unsigned int nbits, nbytes, nlimbs; mpi_limb_t a; if (*str == '-') { sign = 1; str++; } /* Skip optional hex prefix. */ if (*str == '0' && str[1] == 'x') str += 2; nbits = strlen(str); if (nbits > MAX_EXTERN_SCAN_BYTES) { mpi_clear(val); return -EINVAL; } nbits *= 4; if ((nbits % 8)) prepend_zero = 1; nbytes = (nbits+7) / 8; nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; if (val->alloced < nlimbs) mpi_resize(val, nlimbs); i = BYTES_PER_MPI_LIMB - (nbytes % BYTES_PER_MPI_LIMB); i %= BYTES_PER_MPI_LIMB; j = val->nlimbs = nlimbs; val->sign = sign; for (; j > 0; j--) { a = 0; for (; i < BYTES_PER_MPI_LIMB; i++) { if (prepend_zero) { c1 = '0'; prepend_zero = 0; } else c1 = *str++; if (!c1) { mpi_clear(val); return -EINVAL; } c2 = *str++; if (!c2) { mpi_clear(val); return -EINVAL; } if (c1 >= '0' && c1 <= '9') c = c1 - '0'; else if (c1 >= 'a' && c1 <= 'f') c = c1 - 'a' + 10; else if (c1 >= 'A' && c1 <= 'F') c = c1 - 'A' + 10; else { mpi_clear(val); return -EINVAL; } c <<= 4; if (c2 >= '0' && c2 <= '9') c |= c2 - '0'; else if (c2 >= 'a' && c2 <= 'f') c |= c2 - 'a' + 10; else if (c2 >= 'A' && c2 <= 'F') c |= c2 - 'A' + 10; else { mpi_clear(val); return -EINVAL; } a <<= 8; a |= c; } i = 0; val->d[j-1] = a; } return 0; } EXPORT_SYMBOL_GPL(mpi_fromstr); MPI mpi_scanval(const char *string) { MPI a; a = mpi_alloc(0); if (!a) return NULL; if (mpi_fromstr(a, string)) { mpi_free(a); return NULL; } mpi_normalize(a); return a; } EXPORT_SYMBOL_GPL(mpi_scanval); static int count_lzeros(MPI a) { mpi_limb_t alimb; int i, lzeros = 0; for (i = a->nlimbs - 1; i >= 0; i--) { alimb = a->d[i]; if (alimb == 0) { lzeros += sizeof(mpi_limb_t); } else { lzeros += count_leading_zeros(alimb) / 8; break; } } return lzeros; } /** * mpi_read_buffer() - read MPI to a buffer provided by user (msb first) * * @a: a multi precision integer * @buf: buffer to which the output will be written to. Needs to be at * least mpi_get_size(a) long. * @buf_len: size of the buf. * @nbytes: receives the actual length of the data written on success and * the data to-be-written on -EOVERFLOW in case buf_len was too * small. * @sign: if not NULL, it will be set to the sign of a. * * Return: 0 on success or error code in case of error */ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign) { uint8_t *p; #if BYTES_PER_MPI_LIMB == 4 __be32 alimb; #elif BYTES_PER_MPI_LIMB == 8 __be64 alimb; #else #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); int i, lzeros; if (!buf || !nbytes) return -EINVAL; if (sign) *sign = a->sign; lzeros = count_lzeros(a); if (buf_len < n - lzeros) { *nbytes = n - lzeros; return -EOVERFLOW; } p = buf; *nbytes = n - lzeros; for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB, lzeros %= BYTES_PER_MPI_LIMB; i >= 0; i--) { #if BYTES_PER_MPI_LIMB == 4 alimb = cpu_to_be32(a->d[i]); #elif BYTES_PER_MPI_LIMB == 8 alimb = cpu_to_be64(a->d[i]); #else #error please implement for this limb size. #endif memcpy(p, (u8 *)&alimb + lzeros, BYTES_PER_MPI_LIMB - lzeros); p += BYTES_PER_MPI_LIMB - lzeros; lzeros = 0; } return 0; } EXPORT_SYMBOL_GPL(mpi_read_buffer); /* * mpi_get_buffer() - Returns an allocated buffer with the MPI (msb first). * Caller must free the return string. * This function does return a 0 byte buffer with nbytes set to zero if the * value of A is zero. * * @a: a multi precision integer. * @nbytes: receives the length of this buffer. * @sign: if not NULL, it will be set to the sign of the a. * * Return: Pointer to MPI buffer or NULL on error */ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) { uint8_t *buf; unsigned int n; int ret; if (!nbytes) return NULL; n = mpi_get_size(a); if (!n) n++; buf = kmalloc(n, GFP_KERNEL); if (!buf) return NULL; ret = mpi_read_buffer(a, buf, n, nbytes, sign); if (ret) { kfree(buf); return NULL; } return buf; } EXPORT_SYMBOL_GPL(mpi_get_buffer); /** * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first) * * This function works in the same way as the mpi_read_buffer, but it * takes an sgl instead of u8 * buf. * * @a: a multi precision integer * @sgl: scatterlist to write to. Needs to be at least * mpi_get_size(a) long. * @nbytes: the number of bytes to write. Leading bytes will be * filled with zero. * @sign: if not NULL, it will be set to the sign of a. * * Return: 0 on success or error code in case of error */ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, int *sign) { u8 *p, *p2; #if BYTES_PER_MPI_LIMB == 4 __be32 alimb; #elif BYTES_PER_MPI_LIMB == 8 __be64 alimb; #else #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); struct sg_mapping_iter miter; int i, x, buf_len; int nents; if (sign) *sign = a->sign; if (nbytes < n) return -EOVERFLOW; nents = sg_nents_for_len(sgl, nbytes); if (nents < 0) return -EINVAL; sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; while (nbytes > n) { i = min_t(unsigned, nbytes - n, buf_len); memset(p2, 0, i); p2 += i; nbytes -= i; buf_len -= i; if (!buf_len) { sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; } } for (i = a->nlimbs - 1; i >= 0; i--) { #if BYTES_PER_MPI_LIMB == 4 alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0; #elif BYTES_PER_MPI_LIMB == 8 alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0; #else #error please implement for this limb size. #endif p = (u8 *)&alimb; for (x = 0; x < sizeof(alimb); x++) { *p2++ = *p++; if (!--buf_len) { sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; } } } sg_miter_stop(&miter); return 0; } EXPORT_SYMBOL_GPL(mpi_write_to_sgl); /* * mpi_read_raw_from_sgl() - Function allocates an MPI and populates it with * data from the sgl * * This function works in the same way as the mpi_read_raw_data, but it * takes an sgl instead of void * buffer. i.e. it allocates * a new MPI and reads the content of the sgl to the MPI. * * @sgl: scatterlist to read from * @nbytes: number of bytes to read * * Return: Pointer to a new MPI or NULL on error */ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) { struct sg_mapping_iter miter; unsigned int nbits, nlimbs; int x, j, z, lzeros, ents; unsigned int len; const u8 *buff; mpi_limb_t a; MPI val = NULL; ents = sg_nents_for_len(sgl, nbytes); if (ents < 0) return NULL; sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); lzeros = 0; len = 0; while (nbytes > 0) { while (len && !*buff) { lzeros++; len--; buff++; } if (len && *buff) break; sg_miter_next(&miter); buff = miter.addr; len = miter.length; nbytes -= lzeros; lzeros = 0; } miter.consumed = lzeros; nbytes -= lzeros; nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { sg_miter_stop(&miter); pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } if (nbytes > 0) nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8); sg_miter_stop(&miter); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; val->nbits = nbits; val->sign = 0; val->nlimbs = nlimbs; if (nbytes == 0) return val; j = nlimbs - 1; a = 0; z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; z %= BYTES_PER_MPI_LIMB; while (sg_miter_next(&miter)) { buff = miter.addr; len = min_t(unsigned, miter.length, nbytes); nbytes -= len; for (x = 0; x < len; x++) { a <<= 8; a |= *buff++; if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) { val->d[j--] = a; a = 0; } } z += x; } return val; } EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl); /* Perform a two's complement operation on buffer P of size N bytes. */ static void twocompl(unsigned char *p, unsigned int n) { int i; for (i = n-1; i >= 0 && !p[i]; i--) ; if (i >= 0) { if ((p[i] & 0x01)) p[i] = (((p[i] ^ 0xfe) | 0x01) & 0xff); else if ((p[i] & 0x02)) p[i] = (((p[i] ^ 0xfc) | 0x02) & 0xfe); else if ((p[i] & 0x04)) p[i] = (((p[i] ^ 0xf8) | 0x04) & 0xfc); else if ((p[i] & 0x08)) p[i] = (((p[i] ^ 0xf0) | 0x08) & 0xf8); else if ((p[i] & 0x10)) p[i] = (((p[i] ^ 0xe0) | 0x10) & 0xf0); else if ((p[i] & 0x20)) p[i] = (((p[i] ^ 0xc0) | 0x20) & 0xe0); else if ((p[i] & 0x40)) p[i] = (((p[i] ^ 0x80) | 0x40) & 0xc0); else p[i] = 0x80; for (i--; i >= 0; i--) p[i] ^= 0xff; } } int mpi_print(enum gcry_mpi_format format, unsigned char *buffer, size_t buflen, size_t *nwritten, MPI a) { unsigned int nbits = mpi_get_nbits(a); size_t len; size_t dummy_nwritten; int negative; if (!nwritten) nwritten = &dummy_nwritten; /* Libgcrypt does no always care to set clear the sign if the value * is 0. For printing this is a bit of a surprise, in particular * because if some of the formats don't support negative numbers but * should be able to print a zero. Thus we need this extra test * for a negative number. */ if (a->sign && mpi_cmp_ui(a, 0)) negative = 1; else negative = 0; len = buflen; *nwritten = 0; if (format == GCRYMPI_FMT_STD) { unsigned char *tmp; int extra = 0; unsigned int n; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; if (negative) { twocompl(tmp, n); if (!(*tmp & 0x80)) { /* Need to extend the sign. */ n++; extra = 2; } } else if (n && (*tmp & 0x80)) { /* Positive but the high bit of the returned buffer is set. * Thus we need to print an extra leading 0x00 so that the * output is interpreted as a positive number. */ n++; extra = 1; } if (buffer && n > len) { /* The provided buffer is too short. */ kfree(tmp); return -E2BIG; } if (buffer) { unsigned char *s = buffer; if (extra == 1) *s++ = 0; else if (extra) *s++ = 0xff; memcpy(s, tmp, n-!!extra); } kfree(tmp); *nwritten = n; return 0; } else if (format == GCRYMPI_FMT_USG) { unsigned int n = (nbits + 7)/8; /* Note: We ignore the sign for this format. */ /* FIXME: for performance reasons we should put this into * mpi_aprint because we can then use the buffer directly. */ if (buffer && n > len) return -E2BIG; if (buffer) { unsigned char *tmp; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; memcpy(buffer, tmp, n); kfree(tmp); } *nwritten = n; return 0; } else if (format == GCRYMPI_FMT_PGP) { unsigned int n = (nbits + 7)/8; /* The PGP format can only handle unsigned integers. */ if (negative) return -EINVAL; if (buffer && n+2 > len) return -E2BIG; if (buffer) { unsigned char *tmp; unsigned char *s = buffer; s[0] = nbits >> 8; s[1] = nbits; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; memcpy(s+2, tmp, n); kfree(tmp); } *nwritten = n+2; return 0; } else if (format == GCRYMPI_FMT_SSH) { unsigned char *tmp; int extra = 0; unsigned int n; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; if (negative) { twocompl(tmp, n); if (!(*tmp & 0x80)) { /* Need to extend the sign. */ n++; extra = 2; } } else if (n && (*tmp & 0x80)) { n++; extra = 1; } if (buffer && n+4 > len) { kfree(tmp); return -E2BIG; } if (buffer) { unsigned char *s = buffer; *s++ = n >> 24; *s++ = n >> 16; *s++ = n >> 8; *s++ = n; if (extra == 1) *s++ = 0; else if (extra) *s++ = 0xff; memcpy(s, tmp, n-!!extra); } kfree(tmp); *nwritten = 4+n; return 0; } else if (format == GCRYMPI_FMT_HEX) { unsigned char *tmp; int i; int extra = 0; unsigned int n = 0; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; if (!n || (*tmp & 0x80)) extra = 2; if (buffer && 2*n + extra + negative + 1 > len) { kfree(tmp); return -E2BIG; } if (buffer) { unsigned char *s = buffer; if (negative) *s++ = '-'; if (extra) { *s++ = '0'; *s++ = '0'; } for (i = 0; i < n; i++) { unsigned int c = tmp[i]; *s++ = (c >> 4) < 10 ? '0'+(c>>4) : 'A'+(c>>4)-10; c &= 15; *s++ = c < 10 ? '0'+c : 'A'+c-10; } *s++ = 0; *nwritten = s - buffer; } else { *nwritten = 2*n + extra + negative + 1; } kfree(tmp); return 0; } else return -EINVAL; } EXPORT_SYMBOL_GPL(mpi_print);
32102 32102 8258 32102 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _X86_IRQFLAGS_H_ #define _X86_IRQFLAGS_H_ #include <asm/processor-flags.h> #ifndef __ASSEMBLY__ #include <asm/nospec-branch.h> /* * Interrupt control: */ /* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ extern inline unsigned long native_save_fl(void); extern __always_inline unsigned long native_save_fl(void) { unsigned long flags; /* * "=rm" is safe here, because "pop" adjusts the stack before * it evaluates its effective address -- this is part of the * documented behavior of the "pop" instruction. */ asm volatile("# __raw_save_flags\n\t" "pushf ; pop %0" : "=rm" (flags) : /* no input */ : "memory"); return flags; } static __always_inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); } static __always_inline void native_irq_enable(void) { asm volatile("sti": : :"memory"); } static __always_inline void native_safe_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } #endif #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #ifndef __ASSEMBLY__ #include <linux/types.h> static __always_inline unsigned long arch_local_save_flags(void) { return native_save_fl(); } static __always_inline void arch_local_irq_disable(void) { native_irq_disable(); } static __always_inline void arch_local_irq_enable(void) { native_irq_enable(); } /* * Used in the idle loop; sti takes one instruction cycle * to complete: */ static __always_inline void arch_safe_halt(void) { native_safe_halt(); } /* * Used when interrupts are already enabled or to * shutdown the processor: */ static __always_inline void halt(void) { native_halt(); } /* * For spinlocks, etc: */ static __always_inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); arch_local_irq_disable(); return flags; } #else #ifdef CONFIG_X86_64 #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS pushfq; popq %rax #endif #endif #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { return !(flags & X86_EFLAGS_IF); } static __always_inline int arch_irqs_disabled(void) { unsigned long flags = arch_local_save_flags(); return arch_irqs_disabled_flags(flags); } static __always_inline void arch_local_irq_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) arch_local_irq_enable(); } #endif /* !__ASSEMBLY__ */ #endif
1 1 3 14 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 // SPDX-License-Identifier: GPL-2.0-or-later /* * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> * Copyright (c) 2001-2012 Anton Altaparmakov * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> * * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads */ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/stringify.h> #include <linux/kernel.h> #include <linux/uuid.h> #include <linux/msdos_partition.h> #include "ldm.h" #include "check.h" /* * ldm_debug/info/error/crit - Output an error message * @f: A printf format string containing the message * @...: Variables to substitute into @f * * ldm_debug() writes a DEBUG level message to the syslog but only if the * driver was compiled with debug enabled. Otherwise, the call turns into a NOP. */ #ifndef CONFIG_LDM_DEBUG #define ldm_debug(...) do {} while (0) #else #define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a) #endif #define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a) #define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a) #define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a) static __printf(3, 4) void _ldm_printk(const char *level, const char *function, const char *fmt, ...) { struct va_format vaf; va_list args; va_start (args, fmt); vaf.fmt = fmt; vaf.va = &args; printk("%s%s(): %pV\n", level, function, &vaf); va_end(args); } /** * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure * @data: Raw database PRIVHEAD structure loaded from the device * @ph: In-memory privhead structure in which to return parsed information * * This parses the LDM database PRIVHEAD structure supplied in @data and * sets up the in-memory privhead structure @ph with the obtained information. * * Return: 'true' @ph contains the PRIVHEAD data * 'false' @ph contents are undefined */ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph) { bool is_vista = false; BUG_ON(!data || !ph); if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) { ldm_error("Cannot find PRIVHEAD structure. LDM database is" " corrupt. Aborting."); return false; } ph->ver_major = get_unaligned_be16(data + 0x000C); ph->ver_minor = get_unaligned_be16(data + 0x000E); ph->logical_disk_start = get_unaligned_be64(data + 0x011B); ph->logical_disk_size = get_unaligned_be64(data + 0x0123); ph->config_start = get_unaligned_be64(data + 0x012B); ph->config_size = get_unaligned_be64(data + 0x0133); /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ if (ph->ver_major == 2 && ph->ver_minor == 12) is_vista = true; if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) { ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d." " Aborting.", ph->ver_major, ph->ver_minor); return false; } ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major, ph->ver_minor, is_vista ? "Vista" : "2000/XP"); if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */ /* Warn the user and continue, carefully. */ ldm_info("Database is normally %u bytes, it claims to " "be %llu bytes.", LDM_DB_SIZE, (unsigned long long)ph->config_size); } if ((ph->logical_disk_size == 0) || (ph->logical_disk_start + ph->logical_disk_size > ph->config_start)) { ldm_error("PRIVHEAD disk size doesn't match real disk size"); return false; } if (uuid_parse(data + 0x0030, &ph->disk_id)) { ldm_error("PRIVHEAD contains an invalid GUID."); return false; } ldm_debug("Parsed PRIVHEAD successfully."); return true; } /** * ldm_parse_tocblock - Read the LDM Database TOCBLOCK structure * @data: Raw database TOCBLOCK structure loaded from the device * @toc: In-memory toc structure in which to return parsed information * * This parses the LDM Database TOCBLOCK (table of contents) structure supplied * in @data and sets up the in-memory tocblock structure @toc with the obtained * information. * * N.B. The *_start and *_size values returned in @toc are not range-checked. * * Return: 'true' @toc contains the TOCBLOCK data * 'false' @toc contents are undefined */ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc) { BUG_ON (!data || !toc); if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) { ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); return false; } strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; toc->bitmap1_start = get_unaligned_be64(data + 0x2E); toc->bitmap1_size = get_unaligned_be64(data + 0x36); if (strncmp (toc->bitmap1_name, TOC_BITMAP1, sizeof (toc->bitmap1_name)) != 0) { ldm_crit ("TOCBLOCK's first bitmap is '%s', should be '%s'.", TOC_BITMAP1, toc->bitmap1_name); return false; } strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; toc->bitmap2_start = get_unaligned_be64(data + 0x50); toc->bitmap2_size = get_unaligned_be64(data + 0x58); if (strncmp (toc->bitmap2_name, TOC_BITMAP2, sizeof (toc->bitmap2_name)) != 0) { ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", TOC_BITMAP2, toc->bitmap2_name); return false; } ldm_debug ("Parsed TOCBLOCK successfully."); return true; } /** * ldm_parse_vmdb - Read the LDM Database VMDB structure * @data: Raw database VMDB structure loaded from the device * @vm: In-memory vmdb structure in which to return parsed information * * This parses the LDM Database VMDB structure supplied in @data and sets up * the in-memory vmdb structure @vm with the obtained information. * * N.B. The *_start, *_size and *_seq values will be range-checked later. * * Return: 'true' @vm contains VMDB info * 'false' @vm contents are undefined */ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm) { BUG_ON (!data || !vm); if (MAGIC_VMDB != get_unaligned_be32(data)) { ldm_crit ("Cannot find the VMDB, database may be corrupt."); return false; } vm->ver_major = get_unaligned_be16(data + 0x12); vm->ver_minor = get_unaligned_be16(data + 0x14); if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { ldm_error ("Expected VMDB version %d.%d, got %d.%d. " "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); return false; } vm->vblk_size = get_unaligned_be32(data + 0x08); if (vm->vblk_size == 0) { ldm_error ("Illegal VBLK size"); return false; } vm->vblk_offset = get_unaligned_be32(data + 0x0C); vm->last_vblk_seq = get_unaligned_be32(data + 0x04); ldm_debug ("Parsed VMDB successfully."); return true; } /** * ldm_compare_privheads - Compare two privhead objects * @ph1: First privhead * @ph2: Second privhead * * This compares the two privhead structures @ph1 and @ph2. * * Return: 'true' Identical * 'false' Different */ static bool ldm_compare_privheads (const struct privhead *ph1, const struct privhead *ph2) { BUG_ON (!ph1 || !ph2); return ((ph1->ver_major == ph2->ver_major) && (ph1->ver_minor == ph2->ver_minor) && (ph1->logical_disk_start == ph2->logical_disk_start) && (ph1->logical_disk_size == ph2->logical_disk_size) && (ph1->config_start == ph2->config_start) && (ph1->config_size == ph2->config_size) && uuid_equal(&ph1->disk_id, &ph2->disk_id)); } /** * ldm_compare_tocblocks - Compare two tocblock objects * @toc1: First toc * @toc2: Second toc * * This compares the two tocblock structures @toc1 and @toc2. * * Return: 'true' Identical * 'false' Different */ static bool ldm_compare_tocblocks (const struct tocblock *toc1, const struct tocblock *toc2) { BUG_ON (!toc1 || !toc2); return ((toc1->bitmap1_start == toc2->bitmap1_start) && (toc1->bitmap1_size == toc2->bitmap1_size) && (toc1->bitmap2_start == toc2->bitmap2_start) && (toc1->bitmap2_size == toc2->bitmap2_size) && !strncmp (toc1->bitmap1_name, toc2->bitmap1_name, sizeof (toc1->bitmap1_name)) && !strncmp (toc1->bitmap2_name, toc2->bitmap2_name, sizeof (toc1->bitmap2_name))); } /** * ldm_validate_privheads - Compare the primary privhead with its backups * @state: Partition check state including device holding the LDM Database * @ph1: Memory struct to fill with ph contents * * Read and compare all three privheads from disk. * * The privheads on disk show the size and location of the main disk area and * the configuration area (the database). The values are range-checked against * @hd, which contains the real size of the disk. * * Return: 'true' Success * 'false' Error */ static bool ldm_validate_privheads(struct parsed_partitions *state, struct privhead *ph1) { static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; struct privhead *ph[3] = { ph1 }; Sector sect; u8 *data; bool result = false; long num_sects; int i; BUG_ON (!state || !ph1); ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); if (!ph[1] || !ph[2]) { ldm_crit ("Out of memory."); goto out; } /* off[1 & 2] are relative to ph[0]->config_start */ ph[0]->config_start = 0; /* Read and parse privheads */ for (i = 0; i < 3; i++) { data = read_part_sector(state, ph[0]->config_start + off[i], &sect); if (!data) { ldm_crit ("Disk read failed."); goto out; } result = ldm_parse_privhead (data, ph[i]); put_dev_sector (sect); if (!result) { ldm_error ("Cannot find PRIVHEAD %d.", i+1); /* Log again */ if (i < 2) goto out; /* Already logged */ else break; /* FIXME ignore for now, 3rd PH can fail on odd-sized disks */ } } num_sects = get_capacity(state->disk); if ((ph[0]->config_start > num_sects) || ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { ldm_crit ("Database extends beyond the end of the disk."); goto out; } if ((ph[0]->logical_disk_start > ph[0]->config_start) || ((ph[0]->logical_disk_start + ph[0]->logical_disk_size) > ph[0]->config_start)) { ldm_crit ("Disk and database overlap."); goto out; } if (!ldm_compare_privheads (ph[0], ph[1])) { ldm_crit ("Primary and backup PRIVHEADs don't match."); goto out; } /* FIXME ignore this for now if (!ldm_compare_privheads (ph[0], ph[2])) { ldm_crit ("Primary and backup PRIVHEADs don't match."); goto out; }*/ ldm_debug ("Validated PRIVHEADs successfully."); result = true; out: kfree (ph[1]); kfree (ph[2]); return result; } /** * ldm_validate_tocblocks - Validate the table of contents and its backups * @state: Partition check state including device holding the LDM Database * @base: Offset, into @state->disk, of the database * @ldb: Cache of the database structures * * Find and compare the four tables of contents of the LDM Database stored on * @state->disk and return the parsed information into @toc1. * * The offsets and sizes of the configs are range-checked against a privhead. * * Return: 'true' @toc1 contains validated TOCBLOCK info * 'false' @toc1 contents are undefined */ static bool ldm_validate_tocblocks(struct parsed_partitions *state, unsigned long base, struct ldmdb *ldb) { static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; struct tocblock *tb[4]; struct privhead *ph; Sector sect; u8 *data; int i, nr_tbs; bool result = false; BUG_ON(!state || !ldb); ph = &ldb->ph; tb[0] = &ldb->toc; tb[1] = kmalloc_array(3, sizeof(*tb[1]), GFP_KERNEL); if (!tb[1]) { ldm_crit("Out of memory."); goto err; } tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1])); tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2])); /* * Try to read and parse all four TOCBLOCKs. * * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so * skip any that fail as long as we get at least one valid TOCBLOCK. */ for (nr_tbs = i = 0; i < 4; i++) { data = read_part_sector(state, base + off[i], &sect); if (!data) { ldm_error("Disk read failed for TOCBLOCK %d.", i); continue; } if (ldm_parse_tocblock(data, tb[nr_tbs])) nr_tbs++; put_dev_sector(sect); } if (!nr_tbs) { ldm_crit("Failed to find a valid TOCBLOCK."); goto err; } /* Range check the TOCBLOCK against a privhead. */ if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) || ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) > ph->config_size)) { ldm_crit("The bitmaps are out of range. Giving up."); goto err; } /* Compare all loaded TOCBLOCKs. */ for (i = 1; i < nr_tbs; i++) { if (!ldm_compare_tocblocks(tb[0], tb[i])) { ldm_crit("TOCBLOCKs 0 and %d do not match.", i); goto err; } } ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs); result = true; err: kfree(tb[1]); return result; } /** * ldm_validate_vmdb - Read the VMDB and validate it * @state: Partition check state including device holding the LDM Database * @base: Offset, into @bdev, of the database * @ldb: Cache of the database structures * * Find the vmdb of the LDM Database stored on @bdev and return the parsed * information in @ldb. * * Return: 'true' @ldb contains validated VBDB info * 'false' @ldb contents are undefined */ static bool ldm_validate_vmdb(struct parsed_partitions *state, unsigned long base, struct ldmdb *ldb) { Sector sect; u8 *data; bool result = false; struct vmdb *vm; struct tocblock *toc; BUG_ON (!state || !ldb); vm = &ldb->vm; toc = &ldb->toc; data = read_part_sector(state, base + OFF_VMDB, &sect); if (!data) { ldm_crit ("Disk read failed."); return false; } if (!ldm_parse_vmdb (data, vm)) goto out; /* Already logged */ /* Are there uncommitted transactions? */ if (get_unaligned_be16(data + 0x10) != 0x01) { ldm_crit ("Database is not in a consistent state. Aborting."); goto out; } if (vm->vblk_offset != 512) ldm_info ("VBLKs start at offset 0x%04x.", vm->vblk_offset); /* * The last_vblkd_seq can be before the end of the vmdb, just make sure * it is not out of bounds. */ if ((vm->vblk_size * vm->last_vblk_seq) > (toc->bitmap1_size << 9)) { ldm_crit ("VMDB exceeds allowed size specified by TOCBLOCK. " "Database is corrupt. Aborting."); goto out; } result = true; out: put_dev_sector (sect); return result; } /** * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk * @state: Partition check state including device holding the LDM Database * * This function provides a weak test to decide whether the device is a dynamic * disk or not. It looks for an MS-DOS-style partition table containing at * least one partition of type 0x42 (formerly SFS, now used by Windows for * dynamic disks). * * N.B. The only possible error can come from the read_part_sector and that is * only likely to happen if the underlying device is strange. If that IS * the case we should return zero to let someone else try. * * Return: 'true' @state->disk is a dynamic disk * 'false' @state->disk is not a dynamic disk, or an error occurred */ static bool ldm_validate_partition_table(struct parsed_partitions *state) { Sector sect; u8 *data; struct msdos_partition *p; int i; bool result = false; BUG_ON(!state); data = read_part_sector(state, 0, &sect); if (!data) { ldm_info ("Disk read failed."); return false; } if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC)) goto out; p = (struct msdos_partition *)(data + 0x01BE); for (i = 0; i < 4; i++, p++) if (p->sys_ind == LDM_PARTITION) { result = true; break; } if (result) ldm_debug ("Found W2K dynamic disk partition type."); out: put_dev_sector (sect); return result; } /** * ldm_get_disk_objid - Search a linked list of vblk's for a given Disk Id * @ldb: Cache of the database structures * * The LDM Database contains a list of all partitions on all dynamic disks. * The primary PRIVHEAD, at the beginning of the physical disk, tells us * the GUID of this disk. This function searches for the GUID in a linked * list of vblk's. * * Return: Pointer, A matching vblk was found * NULL, No match, or an error */ static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb) { struct list_head *item; BUG_ON (!ldb); list_for_each (item, &ldb->v_disk) { struct vblk *v = list_entry (item, struct vblk, list); if (uuid_equal(&v->vblk.disk.disk_id, &ldb->ph.disk_id)) return v; } return NULL; } /** * ldm_create_data_partitions - Create data partitions for this device * @pp: List of the partitions parsed so far * @ldb: Cache of the database structures * * The database contains ALL the partitions for ALL disk groups, so we need to * filter out this specific disk. Using the disk's object id, we can find all * the partitions in the database that belong to this disk. * * Add each partition in our database, to the parsed_partitions structure. * * N.B. This function creates the partitions in the order it finds partition * objects in the linked list. * * Return: 'true' Partition created * 'false' Error, probably a range checking problem */ static bool ldm_create_data_partitions (struct parsed_partitions *pp, const struct ldmdb *ldb) { struct list_head *item; struct vblk *vb; struct vblk *disk; struct vblk_part *part; int part_num = 1; BUG_ON (!pp || !ldb); disk = ldm_get_disk_objid (ldb); if (!disk) { ldm_crit ("Can't find the ID of this disk in the database."); return false; } strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE); /* Create the data partitions */ list_for_each (item, &ldb->v_part) { vb = list_entry (item, struct vblk, list); part = &vb->vblk.part; if (part->disk_id != disk->obj_id) continue; put_partition (pp, part_num, ldb->ph.logical_disk_start + part->start, part->size); part_num++; } strlcat(pp->pp_buf, "\n", PAGE_SIZE); return true; } /** * ldm_relative - Calculate the next relative offset * @buffer: Block of data being worked on * @buflen: Size of the block of data * @base: Size of the previous fixed width fields * @offset: Cumulative size of the previous variable-width fields * * Because many of the VBLK fields are variable-width, it's necessary * to calculate each offset based on the previous one and the length * of the field it pointed to. * * Return: -1 Error, the calculated offset exceeded the size of the buffer * n OK, a range-checked offset into buffer */ static int ldm_relative(const u8 *buffer, int buflen, int base, int offset) { base += offset; if (!buffer || offset < 0 || base > buflen) { if (!buffer) ldm_error("!buffer"); if (offset < 0) ldm_error("offset (%d) < 0", offset); if (base > buflen) ldm_error("base (%d) > buflen (%d)", base, buflen); return -1; } if (base + buffer[base] >= buflen) { ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base, buffer[base], buflen); return -1; } return buffer[base] + offset + 1; } /** * ldm_get_vnum - Convert a variable-width, big endian number, into cpu order * @block: Pointer to the variable-width number to convert * * Large numbers in the LDM Database are often stored in a packed format. Each * number is prefixed by a one byte width marker. All numbers in the database * are stored in big-endian byte order. This function reads one of these * numbers and returns the result * * N.B. This function DOES NOT perform any range checking, though the most * it will read is eight bytes. * * Return: n A number * 0 Zero, or an error occurred */ static u64 ldm_get_vnum (const u8 *block) { u64 tmp = 0; u8 length; BUG_ON (!block); length = *block++; if (length && length <= 8) while (length--) tmp = (tmp << 8) | *block++; else ldm_error ("Illegal length %d.", length); return tmp; } /** * ldm_get_vstr - Read a length-prefixed string into a buffer * @block: Pointer to the length marker * @buffer: Location to copy string to * @buflen: Size of the output buffer * * Many of the strings in the LDM Database are not NULL terminated. Instead * they are prefixed by a one byte length marker. This function copies one of * these strings into a buffer. * * N.B. This function DOES NOT perform any range checking on the input. * If the buffer is too small, the output will be truncated. * * Return: 0, Error and @buffer contents are undefined * n, String length in characters (excluding NULL) * buflen-1, String was truncated. */ static int ldm_get_vstr (const u8 *block, u8 *buffer, int buflen) { int length; BUG_ON (!block || !buffer); length = block[0]; if (length >= buflen) { ldm_error ("Truncating string %d -> %d.", length, buflen); length = buflen - 1; } memcpy (buffer, block + 1, length); buffer[length] = 0; return length; } /** * ldm_parse_cmp3 - Read a raw VBLK Component object into a vblk structure * @buffer: Block of data being worked on * @buflen: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK Component object (version 3) into a vblk structure. * * Return: 'true' @vb contains a Component VBLK * 'false' @vb contents are not defined */ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, r_vstate, r_child, r_parent, r_stripe, r_cols, len; struct vblk_comp *comp; BUG_ON (!buffer || !vb); r_objid = ldm_relative (buffer, buflen, 0x18, 0); r_name = ldm_relative (buffer, buflen, 0x18, r_objid); r_vstate = ldm_relative (buffer, buflen, 0x18, r_name); r_child = ldm_relative (buffer, buflen, 0x1D, r_vstate); r_parent = ldm_relative (buffer, buflen, 0x2D, r_child); if (buffer[0x12] & VBLK_FLAG_COMP_STRIPE) { r_stripe = ldm_relative (buffer, buflen, 0x2E, r_parent); r_cols = ldm_relative (buffer, buflen, 0x2E, r_stripe); len = r_cols; } else { r_stripe = 0; len = r_parent; } if (len < 0) return false; len += VBLK_SIZE_CMP3; if (len != get_unaligned_be32(buffer + 0x14)) return false; comp = &vb->vblk.comp; ldm_get_vstr (buffer + 0x18 + r_name, comp->state, sizeof (comp->state)); comp->type = buffer[0x18 + r_vstate]; comp->children = ldm_get_vnum (buffer + 0x1D + r_vstate); comp->parent_id = ldm_get_vnum (buffer + 0x2D + r_child); comp->chunksize = r_stripe ? ldm_get_vnum (buffer+r_parent+0x2E) : 0; return true; } /** * ldm_parse_dgr3 - Read a raw VBLK Disk Group object into a vblk structure * @buffer: Block of data being worked on * @buflen: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK Disk Group object (version 3) into a vblk structure. * * Return: 'true' @vb contains a Disk Group VBLK * 'false' @vb contents are not defined */ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, r_diskid, r_id1, r_id2, len; struct vblk_dgrp *dgrp; BUG_ON (!buffer || !vb); r_objid = ldm_relative (buffer, buflen, 0x18, 0); r_name = ldm_relative (buffer, buflen, 0x18, r_objid); r_diskid = ldm_relative (buffer, buflen, 0x18, r_name); if (buffer[0x12] & VBLK_FLAG_DGR3_IDS) { r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid); r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1); len = r_id2; } else len = r_diskid; if (len < 0) return false; len += VBLK_SIZE_DGR3; if (len != get_unaligned_be32(buffer + 0x14)) return false; dgrp = &vb->vblk.dgrp; ldm_get_vstr (buffer + 0x18 + r_name, dgrp->disk_id, sizeof (dgrp->disk_id)); return true; } /** * ldm_parse_dgr4 - Read a raw VBLK Disk Group object into a vblk structure * @buffer: Block of data being worked on * @buflen: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK Disk Group object (version 4) into a vblk structure. * * Return: 'true' @vb contains a Disk Group VBLK * 'false' @vb contents are not defined */ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb) { char buf[64]; int r_objid, r_name, r_id1, r_id2, len; BUG_ON (!buffer || !vb); r_objid = ldm_relative (buffer, buflen, 0x18, 0); r_name = ldm_relative (buffer, buflen, 0x18, r_objid); if (buffer[0x12] & VBLK_FLAG_DGR4_IDS) { r_id1 = ldm_relative (buffer, buflen, 0x44, r_name); r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1); len = r_id2; } else len = r_name; if (len < 0) return false; len += VBLK_SIZE_DGR4; if (len != get_unaligned_be32(buffer + 0x14)) return false; ldm_get_vstr (buffer + 0x18 + r_objid, buf, sizeof (buf)); return true; } /** * ldm_parse_dsk3 - Read a raw VBLK Disk object into a vblk structure * @buffer: Block of data being worked on * @buflen: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK Disk object (version 3) into a vblk structure. * * Return: 'true' @vb contains a Disk VBLK * 'false' @vb contents are not defined */ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, r_diskid, r_altname, len; struct vblk_disk *disk; BUG_ON (!buffer || !vb); r_objid = ldm_relative (buffer, buflen, 0x18, 0); r_name = ldm_relative (buffer, buflen, 0x18, r_objid); r_diskid = ldm_relative (buffer, buflen, 0x18, r_name); r_altname = ldm_relative (buffer, buflen, 0x18, r_diskid); len = r_altname; if (len < 0) return false; len += VBLK_SIZE_DSK3; if (len != get_unaligned_be32(buffer + 0x14)) return false; disk = &vb->vblk.disk; ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name, sizeof (disk->alt_name)); if (uuid_parse(buffer + 0x19 + r_name, &disk->disk_id)) return false; return true; } /** * ldm_parse_dsk4 - Read a raw VBLK Disk object into a vblk structure * @buffer: Block of data being worked on * @buflen: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK Disk object (version 4) into a vblk structure. * * Return: 'true' @vb contains a Disk VBLK * 'false' @vb contents are not defined */ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, len; struct vblk_disk *disk; BUG_ON (!buffer || !vb); r_objid = ldm_relative (buffer, buflen, 0x18, 0); r_name = ldm_relative (buffer, buflen, 0x18, r_objid); len = r_name; if (len < 0) return false; len += VBLK_SIZE_DSK4; if (len != get_unaligned_be32(buffer + 0x14)) return false; disk = &vb->vblk.disk; import_uuid(&disk->disk_id, buffer + 0x18 + r_name); return true; } /** * ldm_parse_prt3 - Read a raw VBLK Partition object into a vblk structure * @buffer: Block of data being worked on * @buflen: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK Partition object (version 3) into a vblk structure. * * Return: 'true' @vb contains a Partition VBLK * 'false' @vb contents are not defined */ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len; struct vblk_part *part; BUG_ON(!buffer || !vb); r_objid = ldm_relative(buffer, buflen, 0x18, 0); if (r_objid < 0) { ldm_error("r_objid %d < 0", r_objid); return false; } r_name = ldm_relative(buffer, buflen, 0x18, r_objid); if (r_name < 0) { ldm_error("r_name %d < 0", r_name); return false; } r_size = ldm_relative(buffer, buflen, 0x34, r_name); if (r_size < 0) { ldm_error("r_size %d < 0", r_size); return false; } r_parent = ldm_relative(buffer, buflen, 0x34, r_size); if (r_parent < 0) { ldm_error("r_parent %d < 0", r_parent); return false; } r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent); if (r_diskid < 0) { ldm_error("r_diskid %d < 0", r_diskid); return false; } if (buffer[0x12] & VBLK_FLAG_PART_INDEX) { r_index = ldm_relative(buffer, buflen, 0x34, r_diskid); if (r_index < 0) { ldm_error("r_index %d < 0", r_index); return false; } len = r_index; } else len = r_diskid; if (len < 0) { ldm_error("len %d < 0", len); return false; } len += VBLK_SIZE_PRT3; if (len > get_unaligned_be32(buffer + 0x14)) { ldm_error("len %d > BE32(buffer + 0x14) %d", len, get_unaligned_be32(buffer + 0x14)); return false; } part = &vb->vblk.part; part->start = get_unaligned_be64(buffer + 0x24 + r_name); part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name); part->size = ldm_get_vnum(buffer + 0x34 + r_name); part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); if (vb->flags & VBLK_FLAG_PART_INDEX) part->partnum = buffer[0x35 + r_diskid]; else part->partnum = 0; return true; } /** * ldm_parse_vol5 - Read a raw VBLK Volume object into a vblk structure * @buffer: Block of data being worked on * @buflen: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK Volume object (version 5) into a vblk structure. * * Return: 'true' @vb contains a Volume VBLK * 'false' @vb contents are not defined */ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size; int r_id1, r_id2, r_size2, r_drive, len; struct vblk_volu *volu; BUG_ON(!buffer || !vb); r_objid = ldm_relative(buffer, buflen, 0x18, 0); if (r_objid < 0) { ldm_error("r_objid %d < 0", r_objid); return false; } r_name = ldm_relative(buffer, buflen, 0x18, r_objid); if (r_name < 0) { ldm_error("r_name %d < 0", r_name); return false; } r_vtype = ldm_relative(buffer, buflen, 0x18, r_name); if (r_vtype < 0) { ldm_error("r_vtype %d < 0", r_vtype); return false; } r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype); if (r_disable_drive_letter < 0) { ldm_error("r_disable_drive_letter %d < 0", r_disable_drive_letter); return false; } r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter); if (r_child < 0) { ldm_error("r_child %d < 0", r_child); return false; } r_size = ldm_relative(buffer, buflen, 0x3D, r_child); if (r_size < 0) { ldm_error("r_size %d < 0", r_size); return false; } if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) { r_id1 = ldm_relative(buffer, buflen, 0x52, r_size); if (r_id1 < 0) { ldm_error("r_id1 %d < 0", r_id1); return false; } } else r_id1 = r_size; if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) { r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1); if (r_id2 < 0) { ldm_error("r_id2 %d < 0", r_id2); return false; } } else r_id2 = r_id1; if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) { r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2); if (r_size2 < 0) { ldm_error("r_size2 %d < 0", r_size2); return false; } } else r_size2 = r_id2; if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { r_drive = ldm_relative(buffer, buflen, 0x52, r_size2); if (r_drive < 0) { ldm_error("r_drive %d < 0", r_drive); return false; } } else r_drive = r_size2; len = r_drive; if (len < 0) { ldm_error("len %d < 0", len); return false; } len += VBLK_SIZE_VOL5; if (len > get_unaligned_be32(buffer + 0x14)) { ldm_error("len %d > BE32(buffer + 0x14) %d", len, get_unaligned_be32(buffer + 0x14)); return false; } volu = &vb->vblk.volu; ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type, sizeof(volu->volume_type)); memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter, sizeof(volu->volume_state)); volu->size = ldm_get_vnum(buffer + 0x3D + r_child); volu->partition_type = buffer[0x41 + r_size]; memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid)); if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint, sizeof(volu->drive_hint)); } return true; } /** * ldm_parse_vblk - Read a raw VBLK object into a vblk structure * @buf: Block of data being worked on * @len: Size of the block of data * @vb: In-memory vblk in which to return information * * Read a raw VBLK object into a vblk structure. This function just reads the * information common to all VBLK types, then delegates the rest of the work to * helper functions: ldm_parse_*. * * Return: 'true' @vb contains a VBLK * 'false' @vb contents are not defined */ static bool ldm_parse_vblk (const u8 *buf, int len, struct vblk *vb) { bool result = false; int r_objid; BUG_ON (!buf || !vb); r_objid = ldm_relative (buf, len, 0x18, 0); if (r_objid < 0) { ldm_error ("VBLK header is corrupt."); return false; } vb->flags = buf[0x12]; vb->type = buf[0x13]; vb->obj_id = ldm_get_vnum (buf + 0x18); ldm_get_vstr (buf+0x18+r_objid, vb->name, sizeof (vb->name)); switch (vb->type) { case VBLK_CMP3: result = ldm_parse_cmp3 (buf, len, vb); break; case VBLK_DSK3: result = ldm_parse_dsk3 (buf, len, vb); break; case VBLK_DSK4: result = ldm_parse_dsk4 (buf, len, vb); break; case VBLK_DGR3: result = ldm_parse_dgr3 (buf, len, vb); break; case VBLK_DGR4: result = ldm_parse_dgr4 (buf, len, vb); break; case VBLK_PRT3: result = ldm_parse_prt3 (buf, len, vb); break; case VBLK_VOL5: result = ldm_parse_vol5 (buf, len, vb); break; } if (result) ldm_debug ("Parsed VBLK 0x%llx (type: 0x%02x) ok.", (unsigned long long) vb->obj_id, vb->type); else ldm_error ("Failed to parse VBLK 0x%llx (type: 0x%02x).", (unsigned long long) vb->obj_id, vb->type); return result; } /** * ldm_ldmdb_add - Adds a raw VBLK entry to the ldmdb database * @data: Raw VBLK to add to the database * @len: Size of the raw VBLK * @ldb: Cache of the database structures * * The VBLKs are sorted into categories. Partitions are also sorted by offset. * * N.B. This function does not check the validity of the VBLKs. * * Return: 'true' The VBLK was added * 'false' An error occurred */ static bool ldm_ldmdb_add (u8 *data, int len, struct ldmdb *ldb) { struct vblk *vb; struct list_head *item; BUG_ON (!data || !ldb); vb = kmalloc (sizeof (*vb), GFP_KERNEL); if (!vb) { ldm_crit ("Out of memory."); return false; } if (!ldm_parse_vblk (data, len, vb)) { kfree(vb); return false; /* Already logged */ } /* Put vblk into the correct list. */ switch (vb->type) { case VBLK_DGR3: case VBLK_DGR4: list_add (&vb->list, &ldb->v_dgrp); break; case VBLK_DSK3: case VBLK_DSK4: list_add (&vb->list, &ldb->v_disk); break; case VBLK_VOL5: list_add (&vb->list, &ldb->v_volu); break; case VBLK_CMP3: list_add (&vb->list, &ldb->v_comp); break; case VBLK_PRT3: /* Sort by the partition's start sector. */ list_for_each (item, &ldb->v_part) { struct vblk *v = list_entry (item, struct vblk, list); if ((v->vblk.part.disk_id == vb->vblk.part.disk_id) && (v->vblk.part.start > vb->vblk.part.start)) { list_add_tail (&vb->list, &v->list); return true; } } list_add_tail (&vb->list, &ldb->v_part); break; } return true; } /** * ldm_frag_add - Add a VBLK fragment to a list * @data: Raw fragment to be added to the list * @size: Size of the raw fragment * @frags: Linked list of VBLK fragments * * Fragmented VBLKs may not be consecutive in the database, so they are placed * in a list so they can be pieced together later. * * Return: 'true' Success, the VBLK was added to the list * 'false' Error, a problem occurred */ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) { struct frag *f; struct list_head *item; int rec, num, group; BUG_ON (!data || !frags); if (size < 2 * VBLK_SIZE_HEAD) { ldm_error("Value of size is too small."); return false; } group = get_unaligned_be32(data + 0x08); rec = get_unaligned_be16(data + 0x0C); num = get_unaligned_be16(data + 0x0E); if ((num < 1) || (num > 4)) { ldm_error ("A VBLK claims to have %d parts.", num); return false; } if (rec >= num) { ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num); return false; } list_for_each (item, frags) { f = list_entry (item, struct frag, list); if (f->group == group) goto found; } f = kmalloc (sizeof (*f) + size*num, GFP_KERNEL); if (!f) { ldm_crit ("Out of memory."); return false; } f->group = group; f->num = num; f->rec = rec; f->map = 0xFF << num; list_add_tail (&f->list, frags); found: if (rec >= f->num) { ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num); return false; } if (f->map & (1 << rec)) { ldm_error ("Duplicate VBLK, part %d.", rec); f->map &= 0x7F; /* Mark the group as broken */ return false; } f->map |= (1 << rec); if (!rec) memcpy(f->data, data, VBLK_SIZE_HEAD); data += VBLK_SIZE_HEAD; size -= VBLK_SIZE_HEAD; memcpy(f->data + VBLK_SIZE_HEAD + rec * size, data, size); return true; } /** * ldm_frag_free - Free a linked list of VBLK fragments * @list: Linked list of fragments * * Free a linked list of VBLK fragments * * Return: none */ static void ldm_frag_free (struct list_head *list) { struct list_head *item, *tmp; BUG_ON (!list); list_for_each_safe (item, tmp, list) kfree (list_entry (item, struct frag, list)); } /** * ldm_frag_commit - Validate fragmented VBLKs and add them to the database * @frags: Linked list of VBLK fragments * @ldb: Cache of the database structures * * Now that all the fragmented VBLKs have been collected, they must be added to * the database for later use. * * Return: 'true' All the fragments we added successfully * 'false' One or more of the fragments we invalid */ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) { struct frag *f; struct list_head *item; BUG_ON (!frags || !ldb); list_for_each (item, frags) { f = list_entry (item, struct frag, list); if (f->map != 0xFF) { ldm_error ("VBLK group %d is incomplete (0x%02x).", f->group, f->map); return false; } if (!ldm_ldmdb_add (f->data, f->num*ldb->vm.vblk_size, ldb)) return false; /* Already logged */ } return true; } /** * ldm_get_vblks - Read the on-disk database of VBLKs into memory * @state: Partition check state including device holding the LDM Database * @base: Offset, into @state->disk, of the database * @ldb: Cache of the database structures * * To use the information from the VBLKs, they need to be read from the disk, * unpacked and validated. We cache them in @ldb according to their type. * * Return: 'true' All the VBLKs were read successfully * 'false' An error occurred */ static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base, struct ldmdb *ldb) { int size, perbuf, skip, finish, s, v, recs; u8 *data = NULL; Sector sect; bool result = false; LIST_HEAD (frags); BUG_ON(!state || !ldb); size = ldb->vm.vblk_size; perbuf = 512 / size; skip = ldb->vm.vblk_offset >> 9; /* Bytes to sectors */ finish = (size * ldb->vm.last_vblk_seq) >> 9; for (s = skip; s < finish; s++) { /* For each sector */ data = read_part_sector(state, base + OFF_VMDB + s, &sect); if (!data) { ldm_crit ("Disk read failed."); goto out; } for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ if (MAGIC_VBLK != get_unaligned_be32(data)) { ldm_error ("Expected to find a VBLK."); goto out; } recs = get_unaligned_be16(data + 0x0E); /* Number of records */ if (recs == 1) { if (!ldm_ldmdb_add (data, size, ldb)) goto out; /* Already logged */ } else if (recs > 1) { if (!ldm_frag_add (data, size, &frags)) goto out; /* Already logged */ } /* else Record is not in use, ignore it. */ } put_dev_sector (sect); data = NULL; } result = ldm_frag_commit (&frags, ldb); /* Failures, already logged */ out: if (data) put_dev_sector (sect); ldm_frag_free (&frags); return result; } /** * ldm_free_vblks - Free a linked list of vblk's * @lh: Head of a linked list of struct vblk * * Free a list of vblk's and free the memory used to maintain the list. * * Return: none */ static void ldm_free_vblks (struct list_head *lh) { struct list_head *item, *tmp; BUG_ON (!lh); list_for_each_safe (item, tmp, lh) kfree (list_entry (item, struct vblk, list)); } /** * ldm_partition - Find out whether a device is a dynamic disk and handle it * @state: Partition check state including device holding the LDM Database * * This determines whether the device @bdev is a dynamic disk and if so creates * the partitions necessary in the gendisk structure pointed to by @hd. * * We create a dummy device 1, which contains the LDM database, and then create * each partition described by the LDM database in sequence as devices 2+. For * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, * and so on: the actual data containing partitions. * * Return: 1 Success, @state->disk is a dynamic disk and we handled it * 0 Success, @state->disk is not a dynamic disk * -1 An error occurred before enough information had been read * Or @state->disk is a dynamic disk, but it may be corrupted */ int ldm_partition(struct parsed_partitions *state) { struct ldmdb *ldb; unsigned long base; int result = -1; BUG_ON(!state); /* Look for signs of a Dynamic Disk */ if (!ldm_validate_partition_table(state)) return 0; ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); if (!ldb) { ldm_crit ("Out of memory."); goto out; } /* Parse and check privheads. */ if (!ldm_validate_privheads(state, &ldb->ph)) goto out; /* Already logged */ /* All further references are relative to base (database start). */ base = ldb->ph.config_start; /* Parse and check tocs and vmdb. */ if (!ldm_validate_tocblocks(state, base, ldb) || !ldm_validate_vmdb(state, base, ldb)) goto out; /* Already logged */ /* Initialize vblk lists in ldmdb struct */ INIT_LIST_HEAD (&ldb->v_dgrp); INIT_LIST_HEAD (&ldb->v_disk); INIT_LIST_HEAD (&ldb->v_volu); INIT_LIST_HEAD (&ldb->v_comp); INIT_LIST_HEAD (&ldb->v_part); if (!ldm_get_vblks(state, base, ldb)) { ldm_crit ("Failed to read the VBLKs from the database."); goto cleanup; } /* Finally, create the data partition devices. */ if (ldm_create_data_partitions(state, ldb)) { ldm_debug ("Parsed LDM database successfully."); result = 1; } /* else Already logged */ cleanup: ldm_free_vblks (&ldb->v_dgrp); ldm_free_vblks (&ldb->v_disk); ldm_free_vblks (&ldb->v_volu); ldm_free_vblks (&ldb->v_comp); ldm_free_vblks (&ldb->v_part); out: kfree (ldb); return result; }
65 157 6 6 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 // SPDX-License-Identifier: GPL-2.0-only /* dummy.c: a dummy net driver The purpose of this driver is to provide a device to point a route through, but not to actually transmit packets. Why? If you have a machine whose only connection is an occasional PPP/SLIP/PLIP link, you can only connect to your own hostname when the link is up. Otherwise you have to use localhost. This isn't very consistent. One solution is to set up a dummy link using PPP/SLIP/PLIP, but this seems (to me) too much overhead for too little gain. This driver provides a small alternative. Thus you can do [when not running slip] ifconfig dummy slip.addr.ess.here up [to go to slip] ifconfig dummy down dip whatever This was written by looking at Donald Becker's skeleton driver and the loopback driver. I then threw away anything that didn't apply! Thanks to Alan Cox for the key clue on what to do with misguided packets. Nick Holloway, 27th May 1994 [I tweaked this explanation a little but that's all] Alan Cox, 30th May 1994 */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/init.h> #include <linux/moduleparam.h> #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> #define DRV_NAME "dummy" static int numdummies = 1; /* fake multicast ability */ static void set_multicast_list(struct net_device *dev) { } static void dummy_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { dev_lstats_read(dev, &stats->tx_packets, &stats->tx_bytes); } static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) { dev_lstats_add(dev, skb->len); skb_tx_timestamp(skb); dev_kfree_skb(skb); return NETDEV_TX_OK; } static int dummy_dev_init(struct net_device *dev) { dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; return 0; } static void dummy_dev_uninit(struct net_device *dev) { free_percpu(dev->lstats); } static int dummy_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) netif_carrier_on(dev); else netif_carrier_off(dev); return 0; } static const struct net_device_ops dummy_netdev_ops = { .ndo_init = dummy_dev_init, .ndo_uninit = dummy_dev_uninit, .ndo_start_xmit = dummy_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = dummy_get_stats64, .ndo_change_carrier = dummy_change_carrier, }; static const struct ethtool_ops dummy_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; static void dummy_setup(struct net_device *dev) { ether_setup(dev); /* Initialize the device structure. */ dev->netdev_ops = &dummy_netdev_ops; dev->ethtool_ops = &dummy_ethtool_ops; dev->needs_free_netdev = true; /* Fill in device structure with ethernet-generic values. */ dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST; dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->features |= NETIF_F_GSO_ENCAP_ALL; dev->hw_features |= dev->features; dev->hw_enc_features |= dev->features; eth_hw_addr_random(dev); dev->min_mtu = 0; dev->max_mtu = 0; } static int dummy_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } return 0; } static struct rtnl_link_ops dummy_link_ops __read_mostly = { .kind = DRV_NAME, .setup = dummy_setup, .validate = dummy_validate, }; /* Number of dummy devices to be set up by this module. */ module_param(numdummies, int, 0); MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices"); static int __init dummy_init_one(void) { struct net_device *dev_dummy; int err; dev_dummy = alloc_netdev(0, "dummy%d", NET_NAME_ENUM, dummy_setup); if (!dev_dummy) return -ENOMEM; dev_dummy->rtnl_link_ops = &dummy_link_ops; err = register_netdevice(dev_dummy); if (err < 0) goto err; return 0; err: free_netdev(dev_dummy); return err; } static int __init dummy_init_module(void) { int i, err = 0; down_write(&pernet_ops_rwsem); rtnl_lock(); err = __rtnl_link_register(&dummy_link_ops); if (err < 0) goto out; for (i = 0; i < numdummies && !err; i++) { err = dummy_init_one(); cond_resched(); } if (err < 0) __rtnl_link_unregister(&dummy_link_ops); out: rtnl_unlock(); up_write(&pernet_ops_rwsem); return err; } static void __exit dummy_cleanup_module(void) { rtnl_link_unregister(&dummy_link_ops); } module_init(dummy_init_module); module_exit(dummy_cleanup_module); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Dummy netdevice driver which discards all packets sent to it"); MODULE_ALIAS_RTNL_LINK(DRV_NAME);
8 1 1 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/ip.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_conntrack.h> #include <linux/netfilter/xt_cluster.h> static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct) { return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; } static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct) { return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; } static inline u_int32_t xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info) { return jhash_1word(ip, info->hash_seed); } static inline u_int32_t xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info) { return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed); } static inline u_int32_t xt_cluster_hash(const struct nf_conn *ct, const struct xt_cluster_match_info *info) { u_int32_t hash = 0; switch(nf_ct_l3num(ct)) { case AF_INET: hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info); break; case AF_INET6: hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info); break; default: WARN_ON(1); break; } return reciprocal_scale(hash, info->total_nodes); } static inline bool xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) { bool is_multicast = false; switch(family) { case NFPROTO_IPV4: is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr); break; case NFPROTO_IPV6: is_multicast = ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr); break; default: WARN_ON(1); break; } return is_multicast; } static bool xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct sk_buff *pskb = (struct sk_buff *)skb; const struct xt_cluster_match_info *info = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned long hash; /* This match assumes that all nodes see the same packets. This can be * achieved if the switch that connects the cluster nodes support some * sort of 'port mirroring'. However, if your switch does not support * this, your cluster nodes can reply ARP request using a multicast MAC * address. Thus, your switch will flood the same packets to the * cluster nodes with the same multicast MAC address. Using a multicast * link address is a RFC 1812 (section 3.3.2) violation, but this works * fine in practise. * * Unfortunately, if you use the multicast MAC address, the link layer * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted * by TCP and others for packets coming to this node. For that reason, * this match mangles skbuff's pkt_type if it detects a packet * addressed to a unicast address but using PACKET_MULTICAST. Yes, I * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) return false; if (ct->master) hash = xt_cluster_hash(ct->master, info); else hash = xt_cluster_hash(ct, info); return !!((1 << hash) & info->node_mask) ^ !!(info->flags & XT_CLUSTER_F_INV); } static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; int ret; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n", info->total_nodes, XT_CLUSTER_NODES_MAX); return -EINVAL; } if (info->node_mask >= (1ULL << info->total_nodes)) { pr_info_ratelimited("node mask cannot exceed total number of nodes\n"); return -EDOM; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match xt_cluster_match __read_mostly = { .name = "cluster", .family = NFPROTO_UNSPEC, .match = xt_cluster_mt, .checkentry = xt_cluster_mt_checkentry, .matchsize = sizeof(struct xt_cluster_match_info), .destroy = xt_cluster_mt_destroy, .me = THIS_MODULE, }; static int __init xt_cluster_mt_init(void) { return xt_register_match(&xt_cluster_match); } static void __exit xt_cluster_mt_fini(void) { xt_unregister_match(&xt_cluster_match); } MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: hash-based cluster match"); MODULE_ALIAS("ipt_cluster"); MODULE_ALIAS("ip6t_cluster"); module_init(xt_cluster_mt_init); module_exit(xt_cluster_mt_fini);
15 14 15 13 13 1 1 1 1 2 2 1 2 1 1 3 1 1 1 3 1 2 1 1 6 3 1 2 1 3 1 3 4 3 1 1 1 5 2 1 1 1 3 2 1 5 2 1 2 1 2 3 1 1 1 1 3 1 1 1 1 10 3 1 6 3 3 4 1 2 3 3 2 2 3 4 2 1 1 2 1 1 3 2 1 3 1 1 1 1 1 1 1 1 1 4 2 1 1 4 2 1 1 274 55 227 13 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2011 Instituto Nokia de Tecnologia * * Authors: * Lauro Ramos Venancio <lauro.venancio@openbossa.org> * Aloisio Almeida Jr <aloisio.almeida@openbossa.org> * * Vendor commands implementation based on net/wireless/nl80211.c * which is: * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ #include <net/genetlink.h> #include <linux/nfc.h> #include <linux/slab.h> #include "nfc.h" #include "llcp.h" static const struct genl_multicast_group nfc_genl_mcgrps[] = { { .name = NFC_GENL_MCAST_EVENT_NAME, }, }; static struct genl_family nfc_genl_family; static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, .len = NFC_DEVICE_NAME_MAXSIZE }, [NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 }, [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 }, [NFC_ATTR_RF_MODE] = { .type = NLA_U8 }, [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 }, [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { [NFC_SDP_ATTR_URI] = { .type = NLA_STRING, .len = U8_MAX - 4 }, [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 }, }; static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, struct netlink_callback *cb, int flags) { void *hdr; hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) return -EMSGSIZE; genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) || nla_put_u16(msg, NFC_ATTR_TARGET_SENS_RES, target->sens_res) || nla_put_u8(msg, NFC_ATTR_TARGET_SEL_RES, target->sel_res)) goto nla_put_failure; if (target->nfcid1_len > 0 && nla_put(msg, NFC_ATTR_TARGET_NFCID1, target->nfcid1_len, target->nfcid1)) goto nla_put_failure; if (target->sensb_res_len > 0 && nla_put(msg, NFC_ATTR_TARGET_SENSB_RES, target->sensb_res_len, target->sensb_res)) goto nla_put_failure; if (target->sensf_res_len > 0 && nla_put(msg, NFC_ATTR_TARGET_SENSF_RES, target->sensf_res_len, target->sensf_res)) goto nla_put_failure; if (target->is_iso15693) { if (nla_put_u8(msg, NFC_ATTR_TARGET_ISO15693_DSFID, target->iso15693_dsfid) || nla_put(msg, NFC_ATTR_TARGET_ISO15693_UID, sizeof(target->iso15693_uid), target->iso15693_uid)) goto nla_put_failure; } genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct nfc_dev *dev; u32 idx; if (!info->info.attrs[NFC_ATTR_DEVICE_INDEX]) return ERR_PTR(-EINVAL); idx = nla_get_u32(info->info.attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return ERR_PTR(-ENODEV); return dev; } static int nfc_genl_dump_targets(struct sk_buff *skb, struct netlink_callback *cb) { int i = cb->args[0]; struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; int rc; if (!dev) { dev = __get_device_from_cb(cb); if (IS_ERR(dev)) return PTR_ERR(dev); cb->args[1] = (long) dev; } device_lock(&dev->dev); cb->seq = dev->targets_generation; while (i < dev->n_targets) { rc = nfc_genl_send_target(skb, &dev->targets[i], cb, NLM_F_MULTI); if (rc < 0) break; i++; } device_unlock(&dev->dev); cb->args[0] = i; return skb->len; } static int nfc_genl_dump_targets_done(struct netlink_callback *cb) { struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; if (dev) nfc_put_device(dev); return 0; } int nfc_genl_targets_found(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; dev->genl_data.poll_req_portid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TARGETS_FOUND); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TARGET_LOST); if (!hdr) goto free_msg; if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target_idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TM_ACTIVATED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; if (nla_put_u32(msg, NFC_ATTR_TM_PROTOCOLS, protocol)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_tm_deactivated(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TM_DEACTIVATED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_setup_device_added(struct nfc_dev *dev, struct sk_buff *msg) { if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) return -1; return 0; } int nfc_genl_device_added(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_DEVICE_ADDED); if (!hdr) goto free_msg; if (nfc_genl_setup_device_added(dev, msg)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_device_removed(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_DEVICE_REMOVED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) { struct sk_buff *msg; struct nlattr *sdp_attr, *uri_attr; struct nfc_llcp_sdp_tlv *sdres; struct hlist_node *n; void *hdr; int rc = -EMSGSIZE; int i; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_LLC_SDRES); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; sdp_attr = nla_nest_start_noflag(msg, NFC_ATTR_LLC_SDP); if (sdp_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; } i = 1; hlist_for_each_entry_safe(sdres, n, sdres_list, node) { pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap); uri_attr = nla_nest_start_noflag(msg, i++); if (uri_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; } if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap)) goto nla_put_failure; if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri)) goto nla_put_failure; nla_nest_end(msg, uri_attr); hlist_del(&sdres->node); nfc_llcp_free_sdp_tlv(sdres); } nla_nest_end(msg, sdp_attr); genlmsg_end(msg, hdr); return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); nla_put_failure: free_msg: nlmsg_free(msg); nfc_llcp_free_sdp_tlv_list(sdres_list); return rc; } int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_ADDED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, type)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_REMOVED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction) { struct nfc_se *se; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_TRANSACTION); if (!hdr) goto free_msg; se = nfc_find_se(dev, se_idx); if (!se) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) || nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len, evt_transaction->aid) || nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len, evt_transaction->params)) goto nla_put_failure; /* evt_transaction is no more used */ devm_kfree(&dev->dev, evt_transaction); genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: /* evt_transaction is no more used */ devm_kfree(&dev->dev, evt_transaction); nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx) { const struct nfc_se *se; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_CONNECTIVITY); if (!hdr) goto free_msg; se = nfc_find_se(dev, se_idx); if (!se) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) return -EMSGSIZE; if (cb) genl_dump_check_consistent(cb, hdr); if (nfc_genl_setup_device_added(dev, msg)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nfc_genl_dump_devices(struct sk_buff *skb, struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; bool first_call = false; if (!iter) { first_call = true; iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long) iter; } mutex_lock(&nfc_devlist_mutex); cb->seq = nfc_devlist_generation; if (first_call) { nfc_device_iter_init(iter); dev = nfc_device_iter_next(iter); } while (dev) { int rc; rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; dev = nfc_device_iter_next(iter); } mutex_unlock(&nfc_devlist_mutex); cb->args[1] = (long) dev; return skb->len; } static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; if (iter) { nfc_device_iter_exit(iter); kfree(iter); } return 0; } int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode) { struct sk_buff *msg; void *hdr; pr_debug("DEP link is up\n"); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_DEP_LINK_UP); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; if (rf_mode == NFC_RF_INITIATOR && nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target_idx)) goto nla_put_failure; if (nla_put_u8(msg, NFC_ATTR_COMM_MODE, comm_mode) || nla_put_u8(msg, NFC_ATTR_RF_MODE, rf_mode)) goto nla_put_failure; genlmsg_end(msg, hdr); dev->dep_link_up = true; genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_dep_link_down_event(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; pr_debug("DEP link is down\n"); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_DEP_LINK_DOWN); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct nfc_dev *dev; u32 idx; int rc = -ENOBUFS; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; goto out_putdev; } rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq, NULL, 0); if (rc < 0) goto out_free; nfc_put_device(dev); return genlmsg_reply(msg, info); out_free: nlmsg_free(msg); out_putdev: nfc_put_device(dev); return rc; } static int nfc_genl_dev_up(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dev_up(dev); nfc_put_device(dev); return rc; } static int nfc_genl_dev_down(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dev_down(dev); nfc_put_device(dev); return rc; } static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; u32 im_protocols = 0, tm_protocols = 0; pr_debug("Poll start\n"); if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || ((!info->attrs[NFC_ATTR_IM_PROTOCOLS] && !info->attrs[NFC_ATTR_PROTOCOLS]) && !info->attrs[NFC_ATTR_TM_PROTOCOLS])) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); if (info->attrs[NFC_ATTR_TM_PROTOCOLS]) tm_protocols = nla_get_u32(info->attrs[NFC_ATTR_TM_PROTOCOLS]); if (info->attrs[NFC_ATTR_IM_PROTOCOLS]) im_protocols = nla_get_u32(info->attrs[NFC_ATTR_IM_PROTOCOLS]); else if (info->attrs[NFC_ATTR_PROTOCOLS]) im_protocols = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; mutex_lock(&dev->genl_data.genl_data_mutex); rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) dev->genl_data.poll_req_portid = info->snd_portid; mutex_unlock(&dev->genl_data.genl_data_mutex); nfc_put_device(dev); return rc; } static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); if (!dev->polling) { device_unlock(&dev->dev); nfc_put_device(dev); return -EINVAL; } device_unlock(&dev->dev); mutex_lock(&dev->genl_data.genl_data_mutex); if (dev->genl_data.poll_req_portid != info->snd_portid) { rc = -EBUSY; goto out; } rc = nfc_stop_poll(dev); dev->genl_data.poll_req_portid = 0; out: mutex_unlock(&dev->genl_data.genl_data_mutex); nfc_put_device(dev); return rc; } static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; u32 device_idx, target_idx, protocol; int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_TARGET_INDEX] || !info->attrs[NFC_ATTR_PROTOCOLS]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(device_idx); if (!dev) return -ENODEV; target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP); rc = nfc_activate_target(dev, target_idx, protocol); nfc_put_device(dev); return rc; } static int nfc_genl_deactivate_target(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; u32 device_idx, target_idx; int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_TARGET_INDEX]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(device_idx); if (!dev) return -ENODEV; target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP); nfc_put_device(dev); return rc; } static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc, tgt_idx; u32 idx; u8 comm; pr_debug("DEP link up\n"); if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_COMM_MODE]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); if (!info->attrs[NFC_ATTR_TARGET_INDEX]) tgt_idx = NFC_TARGET_IDX_ANY; else tgt_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); comm = nla_get_u8(info->attrs[NFC_ATTR_COMM_MODE]); if (comm != NFC_COMM_ACTIVE && comm != NFC_COMM_PASSIVE) return -EINVAL; dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dep_link_up(dev, tgt_idx, comm); nfc_put_device(dev); return rc; } static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_TARGET_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dep_link_down(dev); nfc_put_device(dev); return rc; } static int nfc_genl_send_params(struct sk_buff *msg, struct nfc_llcp_local *local, u32 portid, u32 seq) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, 0, NFC_CMD_LLC_GET_PARAMS); if (!hdr) return -EMSGSIZE; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, local->dev->idx) || nla_put_u8(msg, NFC_ATTR_LLC_PARAM_LTO, local->lto) || nla_put_u8(msg, NFC_ATTR_LLC_PARAM_RW, local->rw) || nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux))) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct nfc_llcp_local *local; int rc = 0; struct sk_buff *msg = NULL; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); local = nfc_llcp_find_local(dev); if (!local) { rc = -ENODEV; goto exit; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; goto put_local; } rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq); put_local: nfc_llcp_local_put(local); exit: device_unlock(&dev->dev); nfc_put_device(dev); if (rc < 0) { if (msg) nlmsg_free(msg); return rc; } return genlmsg_reply(msg, info); } static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct nfc_llcp_local *local; u8 rw = 0; u16 miux = 0; u32 idx; int rc = 0; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || (!info->attrs[NFC_ATTR_LLC_PARAM_LTO] && !info->attrs[NFC_ATTR_LLC_PARAM_RW] && !info->attrs[NFC_ATTR_LLC_PARAM_MIUX])) return -EINVAL; if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) { rw = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_RW]); if (rw > LLCP_MAX_RW) return -EINVAL; } if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) { miux = nla_get_u16(info->attrs[NFC_ATTR_LLC_PARAM_MIUX]); if (miux > LLCP_MAX_MIUX) return -EINVAL; } idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); local = nfc_llcp_find_local(dev); if (!local) { rc = -ENODEV; goto exit; } if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) { if (dev->dep_link_up) { rc = -EINPROGRESS; goto put_local; } local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]); } if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) local->rw = rw; if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) local->miux = cpu_to_be16(miux); put_local: nfc_llcp_local_put(local); exit: device_unlock(&dev->dev); nfc_put_device(dev); return rc; } static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct nfc_llcp_local *local; struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1]; u32 idx; u8 tid; char *uri; int rc = 0, rem; size_t uri_len, tlvs_len; struct hlist_head sdreq_list; struct nfc_llcp_sdp_tlv *sdreq; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_LLC_SDP]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); if (dev->dep_link_up == false) { rc = -ENOLINK; goto exit; } local = nfc_llcp_find_local(dev); if (!local) { rc = -ENODEV; goto exit; } INIT_HLIST_HEAD(&sdreq_list); tlvs_len = 0; nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { rc = nla_parse_nested_deprecated(sdp_attrs, NFC_SDP_ATTR_MAX, attr, nfc_sdp_genl_policy, info->extack); if (rc != 0) { rc = -EINVAL; goto put_local; } if (!sdp_attrs[NFC_SDP_ATTR_URI]) continue; uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]); if (uri_len == 0) continue; uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]); if (uri == NULL || *uri == 0) continue; tid = local->sdreq_next_tid++; sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len); if (sdreq == NULL) { rc = -ENOMEM; goto put_local; } tlvs_len += sdreq->tlv_len; hlist_add_head(&sdreq->node, &sdreq_list); } if (hlist_empty(&sdreq_list)) { rc = -EINVAL; goto put_local; } rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len); put_local: nfc_llcp_local_put(local); exit: device_unlock(&dev->dev); nfc_put_device(dev); return rc; } static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; nla_strscpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], sizeof(firmware_name)); rc = nfc_fw_download(dev, firmware_name); nfc_put_device(dev); return rc; } int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, u32 result) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_FW_DOWNLOAD); if (!hdr) goto free_msg; if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) || nla_put_u32(msg, NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS, result) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_enable_se(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx, se_idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_enable_se(dev, se_idx); nfc_put_device(dev); return rc; } static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx, se_idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_disable_se(dev, se_idx); nfc_put_device(dev); return rc; } static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; struct nfc_se *se, *n; list_for_each_entry_safe(se, n, &dev->secure_elements, list) { hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_SE); if (!hdr) goto nla_put_failure; if (cb) genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) goto nla_put_failure; genlmsg_end(msg, hdr); } return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nfc_genl_dump_ses(struct sk_buff *skb, struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; bool first_call = false; if (!iter) { first_call = true; iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long) iter; } mutex_lock(&nfc_devlist_mutex); cb->seq = nfc_devlist_generation; if (first_call) { nfc_device_iter_init(iter); dev = nfc_device_iter_next(iter); } while (dev) { int rc; rc = nfc_genl_send_se(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; dev = nfc_device_iter_next(iter); } mutex_unlock(&nfc_devlist_mutex); cb->args[1] = (long) dev; return skb->len; } static int nfc_genl_dump_ses_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; if (iter) { nfc_device_iter_exit(iter); kfree(iter); } return 0; } static int nfc_se_io(struct nfc_dev *dev, u32 se_idx, u8 *apdu, size_t apdu_length, se_io_cb_t cb, void *cb_context) { struct nfc_se *se; int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); device_lock(&dev->dev); if (!device_is_registered(&dev->dev)) { rc = -ENODEV; goto error; } if (!dev->dev_up) { rc = -ENODEV; goto error; } if (!dev->ops->se_io) { rc = -EOPNOTSUPP; goto error; } se = nfc_find_se(dev, se_idx); if (!se) { rc = -EINVAL; goto error; } if (se->state != NFC_SE_ENABLED) { rc = -ENODEV; goto error; } rc = dev->ops->se_io(dev, se_idx, apdu, apdu_length, cb, cb_context); device_unlock(&dev->dev); return rc; error: device_unlock(&dev->dev); kfree(cb_context); return rc; } struct se_io_ctx { u32 dev_idx; u32 se_idx; }; static void se_io_cb(void *context, u8 *apdu, size_t apdu_len, int err) { struct se_io_ctx *ctx = context; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { kfree(ctx); return; } hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_SE_IO); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, ctx->dev_idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, ctx->se_idx) || nla_put(msg, NFC_ATTR_SE_APDU, apdu_len, apdu)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); kfree(ctx); return; nla_put_failure: free_msg: nlmsg_free(msg); kfree(ctx); return; } static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct se_io_ctx *ctx; u32 dev_idx, se_idx; u8 *apdu; size_t apdu_len; int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX] || !info->attrs[NFC_ATTR_SE_APDU]) return -EINVAL; dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); dev = nfc_get_device(dev_idx); if (!dev) return -ENODEV; if (!dev->ops || !dev->ops->se_io) { rc = -EOPNOTSUPP; goto put_dev; } apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); if (apdu_len == 0) { rc = -EINVAL; goto put_dev; } apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); if (!apdu) { rc = -EINVAL; goto put_dev; } ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); if (!ctx) { rc = -ENOMEM; goto put_dev; } ctx->dev_idx = dev_idx; ctx->se_idx = se_idx; rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); put_dev: nfc_put_device(dev); return rc; } static int nfc_genl_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; const struct nfc_vendor_cmd *cmd; u32 dev_idx, vid, subcmd; u8 *data; size_t data_len; int i, err; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_VENDOR_ID] || !info->attrs[NFC_ATTR_VENDOR_SUBCMD]) return -EINVAL; dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]); subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); dev = nfc_get_device(dev_idx); if (!dev) return -ENODEV; if (!dev->vendor_cmds || !dev->n_vendor_cmds) { err = -ENODEV; goto put_dev; } if (info->attrs[NFC_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); if (data_len == 0) { err = -EINVAL; goto put_dev; } } else { data = NULL; data_len = 0; } for (i = 0; i < dev->n_vendor_cmds; i++) { cmd = &dev->vendor_cmds[i]; if (cmd->vendor_id != vid || cmd->subcmd != subcmd) continue; dev->cur_cmd_info = info; err = cmd->doit(dev, data, data_len); dev->cur_cmd_info = NULL; goto put_dev; } err = -EOPNOTSUPP; put_dev: nfc_put_device(dev); return err; } /* message building helper */ static inline void *nfc_hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ return genlmsg_put(skb, portid, seq, &nfc_genl_family, flags, cmd); } static struct sk_buff * __nfc_alloc_vendor_cmd_skb(struct nfc_dev *dev, int approxlen, u32 portid, u32 seq, enum nfc_attrs attr, u32 oui, u32 subcmd, gfp_t gfp) { struct sk_buff *skb; void *hdr; skb = nlmsg_new(approxlen + 100, gfp); if (!skb) return NULL; hdr = nfc_hdr_put(skb, portid, seq, 0, NFC_CMD_VENDOR); if (!hdr) { kfree_skb(skb); return NULL; } if (nla_put_u32(skb, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; if (nla_put_u32(skb, NFC_ATTR_VENDOR_ID, oui)) goto nla_put_failure; if (nla_put_u32(skb, NFC_ATTR_VENDOR_SUBCMD, subcmd)) goto nla_put_failure; ((void **)skb->cb)[0] = dev; ((void **)skb->cb)[1] = hdr; return skb; nla_put_failure: kfree_skb(skb); return NULL; } struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev, enum nfc_attrs attr, u32 oui, u32 subcmd, int approxlen) { if (WARN_ON(!dev->cur_cmd_info)) return NULL; return __nfc_alloc_vendor_cmd_skb(dev, approxlen, dev->cur_cmd_info->snd_portid, dev->cur_cmd_info->snd_seq, attr, oui, subcmd, GFP_KERNEL); } EXPORT_SYMBOL(__nfc_alloc_vendor_cmd_reply_skb); int nfc_vendor_cmd_reply(struct sk_buff *skb) { struct nfc_dev *dev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; /* clear CB data for netlink core to own from now on */ memset(skb->cb, 0, sizeof(skb->cb)); if (WARN_ON(!dev->cur_cmd_info)) { kfree_skb(skb); return -EINVAL; } genlmsg_end(skb, hdr); return genlmsg_reply(skb, dev->cur_cmd_info); } EXPORT_SYMBOL(nfc_vendor_cmd_reply); static const struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_get_device, .dumpit = nfc_genl_dump_devices, .done = nfc_genl_dump_devices_done, }, { .cmd = NFC_CMD_DEV_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_up, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEV_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_down, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_START_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_start_poll, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_STOP_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_stop_poll, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_up, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_down, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, }, { .cmd = NFC_CMD_LLC_GET_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_get_params, }, { .cmd = NFC_CMD_LLC_SET_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_set_params, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_LLC_SDREQ, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_sdreq, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_FW_DOWNLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_fw_download, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ENABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_enable_se, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DISABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_disable_se, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nfc_genl_dump_ses, .done = nfc_genl_dump_ses_done, }, { .cmd = NFC_CMD_SE_IO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_se_io, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_activate_target, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_VENDOR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_vendor_cmd, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_deactivate_target, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family nfc_genl_family __ro_after_init = { .hdrsize = 0, .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, .maxattr = NFC_ATTR_MAX, .policy = nfc_genl_policy, .module = THIS_MODULE, .ops = nfc_genl_ops, .n_ops = ARRAY_SIZE(nfc_genl_ops), .resv_start_op = NFC_CMD_DEACTIVATE_TARGET + 1, .mcgrps = nfc_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps), }; struct urelease_work { struct work_struct w; u32 portid; }; static void nfc_urelease_event_work(struct work_struct *work) { struct urelease_work *w = container_of(work, struct urelease_work, w); struct class_dev_iter iter; struct nfc_dev *dev; pr_debug("portid %d\n", w->portid); mutex_lock(&nfc_devlist_mutex); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { mutex_lock(&dev->genl_data.genl_data_mutex); if (dev->genl_data.poll_req_portid == w->portid) { nfc_stop_poll(dev); dev->genl_data.poll_req_portid = 0; } mutex_unlock(&dev->genl_data.genl_data_mutex); dev = nfc_device_iter_next(&iter); } nfc_device_iter_exit(&iter); mutex_unlock(&nfc_devlist_mutex); kfree(w); } static int nfc_genl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; struct urelease_work *w; if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) goto out; pr_debug("NETLINK_URELEASE event from id %d\n", n->portid); w = kmalloc(sizeof(*w), GFP_ATOMIC); if (w) { INIT_WORK(&w->w, nfc_urelease_event_work); w->portid = n->portid; schedule_work(&w->w); } out: return NOTIFY_DONE; } void nfc_genl_data_init(struct nfc_genl_data *genl_data) { genl_data->poll_req_portid = 0; mutex_init(&genl_data->genl_data_mutex); } void nfc_genl_data_exit(struct nfc_genl_data *genl_data) { mutex_destroy(&genl_data->genl_data_mutex); } static struct notifier_block nl_notifier = { .notifier_call = nfc_genl_rcv_nl_event, }; /** * nfc_genl_init() - Initialize netlink interface * * This initialization function registers the nfc netlink family. */ int __init nfc_genl_init(void) { int rc; rc = genl_register_family(&nfc_genl_family); if (rc) return rc; netlink_register_notifier(&nl_notifier); return 0; } /** * nfc_genl_exit() - Deinitialize netlink interface * * This exit function unregisters the nfc netlink family. */ void nfc_genl_exit(void) { netlink_unregister_notifier(&nl_notifier); genl_unregister_family(&nfc_genl_family); }
427 427 427 65 65 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) ST-Ericsson AB 2010 * Author: Sjur Brendeland */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/stddef.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/pkt_sched.h> #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/cfctrl.h> #define container_obj(layr) container_of(layr, struct cfctrl, serv.layer) #define UTILITY_NAME_LENGTH 16 #define CFPKT_CTRL_PKT_LEN 20 #ifdef CAIF_NO_LOOP static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt){ return -1; } #else static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt); #endif static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt); static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid); struct cflayer *cfctrl_create(void) { struct dev_info dev_info; struct cfctrl *this = kzalloc(sizeof(struct cfctrl), GFP_ATOMIC); if (!this) return NULL; caif_assert(offsetof(struct cfctrl, serv.layer) == 0); memset(&dev_info, 0, sizeof(dev_info)); dev_info.id = 0xff; cfsrvl_init(&this->serv, 0, &dev_info, false); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); this->serv.layer.receive = cfctrl_recv; sprintf(this->serv.layer.name, "ctrl"); this->serv.layer.ctrlcmd = cfctrl_ctrlcmd; #ifndef CAIF_NO_LOOP spin_lock_init(&this->loop_linkid_lock); this->loop_linkid = 1; #endif spin_lock_init(&this->info_list_lock); INIT_LIST_HEAD(&this->list); return &this->serv.layer; } void cfctrl_remove(struct cflayer *layer) { struct cfctrl_request_info *p, *tmp; struct cfctrl *ctrl = container_obj(layer); spin_lock_bh(&ctrl->info_list_lock); list_for_each_entry_safe(p, tmp, &ctrl->list, list) { list_del(&p->list); kfree(p); } spin_unlock_bh(&ctrl->info_list_lock); kfree(layer); } static bool param_eq(const struct cfctrl_link_param *p1, const struct cfctrl_link_param *p2) { bool eq = p1->linktype == p2->linktype && p1->priority == p2->priority && p1->phyid == p2->phyid && p1->endpoint == p2->endpoint && p1->chtype == p2->chtype; if (!eq) return false; switch (p1->linktype) { case CFCTRL_SRV_VEI: return true; case CFCTRL_SRV_DATAGRAM: return p1->u.datagram.connid == p2->u.datagram.connid; case CFCTRL_SRV_RFM: return p1->u.rfm.connid == p2->u.rfm.connid && strcmp(p1->u.rfm.volume, p2->u.rfm.volume) == 0; case CFCTRL_SRV_UTIL: return p1->u.utility.fifosize_kb == p2->u.utility.fifosize_kb && p1->u.utility.fifosize_bufs == p2->u.utility.fifosize_bufs && strcmp(p1->u.utility.name, p2->u.utility.name) == 0 && p1->u.utility.paramlen == p2->u.utility.paramlen && memcmp(p1->u.utility.params, p2->u.utility.params, p1->u.utility.paramlen) == 0; case CFCTRL_SRV_VIDEO: return p1->u.video.connid == p2->u.video.connid; case CFCTRL_SRV_DBG: return true; case CFCTRL_SRV_DECM: return false; default: return false; } return false; } static bool cfctrl_req_eq(const struct cfctrl_request_info *r1, const struct cfctrl_request_info *r2) { if (r1->cmd != r2->cmd) return false; if (r1->cmd == CFCTRL_CMD_LINK_SETUP) return param_eq(&r1->param, &r2->param); else return r1->channel_id == r2->channel_id; } /* Insert request at the end */ static void cfctrl_insert_req(struct cfctrl *ctrl, struct cfctrl_request_info *req) { spin_lock_bh(&ctrl->info_list_lock); atomic_inc(&ctrl->req_seq_no); req->sequence_no = atomic_read(&ctrl->req_seq_no); list_add_tail(&req->list, &ctrl->list); spin_unlock_bh(&ctrl->info_list_lock); } /* Compare and remove request */ static struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, struct cfctrl_request_info *req) { struct cfctrl_request_info *p, *tmp, *first; first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list); list_for_each_entry_safe(p, tmp, &ctrl->list, list) { if (cfctrl_req_eq(req, p)) { if (p != first) pr_warn("Requests are not received in order\n"); atomic_set(&ctrl->rsp_seq_no, p->sequence_no); list_del(&p->list); goto out; } } p = NULL; out: return p; } struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer) { struct cfctrl *this = container_obj(layer); return &this->res; } static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl) { info->hdr_len = 0; info->channel_id = cfctrl->serv.layer.id; info->dev_info = &cfctrl->serv.dev_info; } void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) { struct cfpkt *pkt; struct cfctrl *cfctrl = container_obj(layer); struct cflayer *dn = cfctrl->serv.layer.dn; if (!dn) { pr_debug("not able to send enum request\n"); return; } pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); if (!pkt) return; caif_assert(offsetof(struct cfctrl, serv.layer) == 0); init_info(cfpkt_info(pkt), cfctrl); cfpkt_info(pkt)->dev_info->id = physlinkid; cfctrl->serv.dev_info.id = physlinkid; cfpkt_addbdy(pkt, CFCTRL_CMD_ENUM); cfpkt_addbdy(pkt, physlinkid); cfpkt_set_prio(pkt, TC_PRIO_CONTROL); dn->transmit(dn, pkt); } int cfctrl_linkup_request(struct cflayer *layer, struct cfctrl_link_param *param, struct cflayer *user_layer) { struct cfctrl *cfctrl = container_obj(layer); u32 tmp32; u16 tmp16; u8 tmp8; struct cfctrl_request_info *req; int ret; char utility_name[16]; struct cfpkt *pkt; struct cflayer *dn = cfctrl->serv.layer.dn; if (!dn) { pr_debug("not able to send linkup request\n"); return -ENODEV; } if (cfctrl_cancel_req(layer, user_layer) > 0) { /* Slight Paranoia, check if already connecting */ pr_err("Duplicate connect request for same client\n"); WARN_ON(1); return -EALREADY; } pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); if (!pkt) return -ENOMEM; cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype); cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid); cfpkt_addbdy(pkt, param->endpoint & 0x03); switch (param->linktype) { case CFCTRL_SRV_VEI: break; case CFCTRL_SRV_VIDEO: cfpkt_addbdy(pkt, (u8) param->u.video.connid); break; case CFCTRL_SRV_DBG: break; case CFCTRL_SRV_DATAGRAM: tmp32 = cpu_to_le32(param->u.datagram.connid); cfpkt_add_body(pkt, &tmp32, 4); break; case CFCTRL_SRV_RFM: /* Construct a frame, convert DatagramConnectionID to network * format long and copy it out... */ tmp32 = cpu_to_le32(param->u.rfm.connid); cfpkt_add_body(pkt, &tmp32, 4); /* Add volume name, including zero termination... */ cfpkt_add_body(pkt, param->u.rfm.volume, strlen(param->u.rfm.volume) + 1); break; case CFCTRL_SRV_UTIL: tmp16 = cpu_to_le16(param->u.utility.fifosize_kb); cfpkt_add_body(pkt, &tmp16, 2); tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); cfpkt_add_body(pkt, &tmp16, 2); memset(utility_name, 0, sizeof(utility_name)); strscpy(utility_name, param->u.utility.name, UTILITY_NAME_LENGTH); cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); tmp8 = param->u.utility.paramlen; cfpkt_add_body(pkt, &tmp8, 1); cfpkt_add_body(pkt, param->u.utility.params, param->u.utility.paramlen); break; default: pr_warn("Request setup of bad link type = %d\n", param->linktype); cfpkt_destroy(pkt); return -EINVAL; } req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) { cfpkt_destroy(pkt); return -ENOMEM; } req->client_layer = user_layer; req->cmd = CFCTRL_CMD_LINK_SETUP; req->param = *param; cfctrl_insert_req(cfctrl, req); init_info(cfpkt_info(pkt), cfctrl); /* * NOTE:Always send linkup and linkdown request on the same * device as the payload. Otherwise old queued up payload * might arrive with the newly allocated channel ID. */ cfpkt_info(pkt)->dev_info->id = param->phyid; cfpkt_set_prio(pkt, TC_PRIO_CONTROL); ret = dn->transmit(dn, pkt); if (ret < 0) { int count; count = cfctrl_cancel_req(&cfctrl->serv.layer, user_layer); if (count != 1) { pr_err("Could not remove request (%d)", count); return -ENODEV; } } return 0; } int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, struct cflayer *client) { int ret; struct cfpkt *pkt; struct cfctrl *cfctrl = container_obj(layer); struct cflayer *dn = cfctrl->serv.layer.dn; if (!dn) { pr_debug("not able to send link-down request\n"); return -ENODEV; } pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); if (!pkt) return -ENOMEM; cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); cfpkt_addbdy(pkt, channelid); init_info(cfpkt_info(pkt), cfctrl); cfpkt_set_prio(pkt, TC_PRIO_CONTROL); ret = dn->transmit(dn, pkt); #ifndef CAIF_NO_LOOP cfctrl->loop_linkused[channelid] = 0; #endif return ret; } int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) { struct cfctrl_request_info *p, *tmp; struct cfctrl *ctrl = container_obj(layr); int found = 0; spin_lock_bh(&ctrl->info_list_lock); list_for_each_entry_safe(p, tmp, &ctrl->list, list) { if (p->client_layer == adap_layer) { list_del(&p->list); kfree(p); found++; } } spin_unlock_bh(&ctrl->info_list_lock); return found; } static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) { u8 cmdrsp; u8 cmd; int ret = -1; u8 len; u8 param[255]; u8 linkid = 0; struct cfctrl *cfctrl = container_obj(layer); struct cfctrl_request_info rsp, *req; cmdrsp = cfpkt_extr_head_u8(pkt); cmd = cmdrsp & CFCTRL_CMD_MASK; if (cmd != CFCTRL_CMD_LINK_ERR && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp) && CFCTRL_ERR_BIT != (CFCTRL_ERR_BIT & cmdrsp)) { if (handle_loop(cfctrl, cmd, pkt) != 0) cmdrsp |= CFCTRL_ERR_BIT; } switch (cmd) { case CFCTRL_CMD_LINK_SETUP: { enum cfctrl_srv serv; enum cfctrl_srv servtype; u8 endpoint; u8 physlinkid; u8 prio; u8 tmp; u8 *cp; int i; struct cfctrl_link_param linkparam; memset(&linkparam, 0, sizeof(linkparam)); tmp = cfpkt_extr_head_u8(pkt); serv = tmp & CFCTRL_SRV_MASK; linkparam.linktype = serv; servtype = tmp >> 4; linkparam.chtype = servtype; tmp = cfpkt_extr_head_u8(pkt); physlinkid = tmp & 0x07; prio = tmp >> 3; linkparam.priority = prio; linkparam.phyid = physlinkid; endpoint = cfpkt_extr_head_u8(pkt); linkparam.endpoint = endpoint & 0x03; switch (serv) { case CFCTRL_SRV_VEI: case CFCTRL_SRV_DBG: if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_VIDEO: tmp = cfpkt_extr_head_u8(pkt); linkparam.u.video.connid = tmp; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_DATAGRAM: linkparam.u.datagram.connid = cfpkt_extr_head_u32(pkt); if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_RFM: /* Construct a frame, convert * DatagramConnectionID * to network format long and copy it out... */ linkparam.u.rfm.connid = cfpkt_extr_head_u32(pkt); cp = (u8 *) linkparam.u.rfm.volume; for (tmp = cfpkt_extr_head_u8(pkt); cfpkt_more(pkt) && tmp != '\0'; tmp = cfpkt_extr_head_u8(pkt)) *cp++ = tmp; *cp = '\0'; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_UTIL: /* Construct a frame, convert * DatagramConnectionID * to network format long and copy it out... */ /* Fifosize KB */ linkparam.u.utility.fifosize_kb = cfpkt_extr_head_u16(pkt); /* Fifosize bufs */ linkparam.u.utility.fifosize_bufs = cfpkt_extr_head_u16(pkt); /* name */ cp = (u8 *) linkparam.u.utility.name; caif_assert(sizeof(linkparam.u.utility.name) >= UTILITY_NAME_LENGTH); for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) { tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } /* Length */ len = cfpkt_extr_head_u8(pkt); linkparam.u.utility.paramlen = len; /* Param Data */ cp = linkparam.u.utility.params; while (cfpkt_more(pkt) && len--) { tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ linkid = cfpkt_extr_head_u8(pkt); /* Length */ len = cfpkt_extr_head_u8(pkt); /* Param Data */ cfpkt_extr_head(pkt, &param, len); break; default: pr_warn("Request setup, invalid type (%d)\n", serv); goto error; } rsp.cmd = cmd; rsp.param = linkparam; spin_lock_bh(&cfctrl->info_list_lock); req = cfctrl_remove_req(cfctrl, &rsp); if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || cfpkt_erroneous(pkt)) { pr_err("Invalid O/E bit or parse error " "on CAIF control channel\n"); cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 0, req ? req->client_layer : NULL); } else { cfctrl->res.linksetup_rsp(cfctrl->serv. layer.up, linkid, serv, physlinkid, req ? req-> client_layer : NULL); } kfree(req); spin_unlock_bh(&cfctrl->info_list_lock); } break; case CFCTRL_CMD_LINK_DESTROY: linkid = cfpkt_extr_head_u8(pkt); cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); break; case CFCTRL_CMD_LINK_ERR: pr_err("Frame Error Indication received\n"); cfctrl->res.linkerror_ind(); break; case CFCTRL_CMD_ENUM: cfctrl->res.enum_rsp(); break; case CFCTRL_CMD_SLEEP: cfctrl->res.sleep_rsp(); break; case CFCTRL_CMD_WAKE: cfctrl->res.wake_rsp(); break; case CFCTRL_CMD_LINK_RECONF: cfctrl->res.restart_rsp(); break; case CFCTRL_CMD_RADIO_SET: cfctrl->res.radioset_rsp(); break; default: pr_err("Unrecognized Control Frame\n"); goto error; } ret = 0; error: cfpkt_destroy(pkt); return ret; } static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid) { struct cfctrl *this = container_obj(layr); switch (ctrl) { case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: case CAIF_CTRLCMD_FLOW_OFF_IND: spin_lock_bh(&this->info_list_lock); if (!list_empty(&this->list)) pr_debug("Received flow off in control layer\n"); spin_unlock_bh(&this->info_list_lock); break; case _CAIF_CTRLCMD_PHYIF_DOWN_IND: { struct cfctrl_request_info *p, *tmp; /* Find all connect request and report failure */ spin_lock_bh(&this->info_list_lock); list_for_each_entry_safe(p, tmp, &this->list, list) { if (p->param.phyid == phyid) { list_del(&p->list); p->client_layer->ctrlcmd(p->client_layer, CAIF_CTRLCMD_INIT_FAIL_RSP, phyid); kfree(p); } } spin_unlock_bh(&this->info_list_lock); break; } default: break; } } #ifndef CAIF_NO_LOOP static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt) { static int last_linkid; static int dec; u8 linkid, linktype, tmp; switch (cmd) { case CFCTRL_CMD_LINK_SETUP: spin_lock_bh(&ctrl->loop_linkid_lock); if (!dec) { for (linkid = last_linkid + 1; linkid < 254; linkid++) if (!ctrl->loop_linkused[linkid]) goto found; } dec = 1; for (linkid = last_linkid - 1; linkid > 1; linkid--) if (!ctrl->loop_linkused[linkid]) goto found; spin_unlock_bh(&ctrl->loop_linkid_lock); return -1; found: if (linkid < 10) dec = 0; if (!ctrl->loop_linkused[linkid]) ctrl->loop_linkused[linkid] = 1; last_linkid = linkid; cfpkt_add_trail(pkt, &linkid, 1); spin_unlock_bh(&ctrl->loop_linkid_lock); cfpkt_peek_head(pkt, &linktype, 1); if (linktype == CFCTRL_SRV_UTIL) { tmp = 0x01; cfpkt_add_trail(pkt, &tmp, 1); cfpkt_add_trail(pkt, &tmp, 1); } break; case CFCTRL_CMD_LINK_DESTROY: spin_lock_bh(&ctrl->loop_linkid_lock); cfpkt_peek_head(pkt, &linkid, 1); ctrl->loop_linkused[linkid] = 0; spin_unlock_bh(&ctrl->loop_linkid_lock); break; default: break; } return 0; } #endif
5 2 1 1 2 8 7 9 2 4 5 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 /* SPDX-License-Identifier: GPL-2.0-only */ /* * An interface between IEEE802.15.4 device and rest of the kernel. * * Copyright (C) 2007-2012 Siemens AG * * Written by: * Pavel Smolenskiy <pavel.smolenskiy@gmail.com> * Maxim Gorbachyov <maxim.gorbachev@siemens.com> * Maxim Osipov <maxim.osipov@siemens.com> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #ifndef IEEE802154_NETDEVICE_H #define IEEE802154_NETDEVICE_H #define IEEE802154_REQUIRED_SIZE(struct_type, member) \ (offsetof(typeof(struct_type), member) + \ sizeof(((typeof(struct_type) *)(NULL))->member)) #define IEEE802154_ADDR_OFFSET \ offsetof(typeof(struct sockaddr_ieee802154), addr) #define IEEE802154_MIN_NAMELEN (IEEE802154_ADDR_OFFSET + \ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, addr_type)) #define IEEE802154_NAMELEN_SHORT (IEEE802154_ADDR_OFFSET + \ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, short_addr)) #define IEEE802154_NAMELEN_LONG (IEEE802154_ADDR_OFFSET + \ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, hwaddr)) #include <net/af_ieee802154.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/ieee802154.h> #include <net/cfg802154.h> struct ieee802154_beacon_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) u16 beacon_order:4, superframe_order:4, final_cap_slot:4, battery_life_ext:1, reserved0:1, pan_coordinator:1, assoc_permit:1; u8 gts_count:3, gts_reserved:4, gts_permit:1; u8 pend_short_addr_count:3, reserved1:1, pend_ext_addr_count:3, reserved2:1; #elif defined(__BIG_ENDIAN_BITFIELD) u16 assoc_permit:1, pan_coordinator:1, reserved0:1, battery_life_ext:1, final_cap_slot:4, superframe_order:4, beacon_order:4; u8 gts_permit:1, gts_reserved:4, gts_count:3; u8 reserved2:1, pend_ext_addr_count:3, reserved1:1, pend_short_addr_count:3; #else #error "Please fix <asm/byteorder.h>" #endif } __packed; struct ieee802154_mac_cmd_pl { u8 cmd_id; } __packed; struct ieee802154_sechdr { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 level:3, key_id_mode:2, reserved:3; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved:3, key_id_mode:2, level:3; #else #error "Please fix <asm/byteorder.h>" #endif u8 key_id; __le32 frame_counter; union { __le32 short_src; __le64 extended_src; }; }; struct ieee802154_hdr_fc { #if defined(__LITTLE_ENDIAN_BITFIELD) u16 type:3, security_enabled:1, frame_pending:1, ack_request:1, intra_pan:1, reserved:3, dest_addr_mode:2, version:2, source_addr_mode:2; #elif defined(__BIG_ENDIAN_BITFIELD) u16 reserved:1, intra_pan:1, ack_request:1, frame_pending:1, security_enabled:1, type:3, source_addr_mode:2, version:2, dest_addr_mode:2, reserved2:2; #else #error "Please fix <asm/byteorder.h>" #endif }; struct ieee802154_assoc_req_pl { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved1:1, device_type:1, power_source:1, rx_on_when_idle:1, assoc_type:1, reserved2:1, security_cap:1, alloc_addr:1; #elif defined(__BIG_ENDIAN_BITFIELD) u8 alloc_addr:1, security_cap:1, reserved2:1, assoc_type:1, rx_on_when_idle:1, power_source:1, device_type:1, reserved1:1; #else #error "Please fix <asm/byteorder.h>" #endif } __packed; struct ieee802154_assoc_resp_pl { __le16 short_addr; u8 status; } __packed; enum ieee802154_frame_version { IEEE802154_2003_STD, IEEE802154_2006_STD, IEEE802154_STD, IEEE802154_RESERVED_STD, IEEE802154_MULTIPURPOSE_STD = IEEE802154_2003_STD, }; enum ieee802154_addressing_mode { IEEE802154_NO_ADDRESSING, IEEE802154_RESERVED, IEEE802154_SHORT_ADDRESSING, IEEE802154_EXTENDED_ADDRESSING, }; enum ieee802154_association_status { IEEE802154_ASSOCIATION_SUCCESSFUL = 0x00, IEEE802154_PAN_AT_CAPACITY = 0x01, IEEE802154_PAN_ACCESS_DENIED = 0x02, IEEE802154_HOPPING_SEQUENCE_OFFSET_DUP = 0x03, IEEE802154_FAST_ASSOCIATION_SUCCESSFUL = 0x80, }; enum ieee802154_disassociation_reason { IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE = 0x1, IEEE802154_DEVICE_WISHES_TO_LEAVE = 0x2, }; struct ieee802154_hdr { struct ieee802154_hdr_fc fc; u8 seq; struct ieee802154_addr source; struct ieee802154_addr dest; struct ieee802154_sechdr sec; }; struct ieee802154_beacon_frame { struct ieee802154_hdr mhr; struct ieee802154_beacon_hdr mac_pl; }; struct ieee802154_mac_cmd_frame { struct ieee802154_hdr mhr; struct ieee802154_mac_cmd_pl mac_pl; }; struct ieee802154_beacon_req_frame { struct ieee802154_hdr mhr; struct ieee802154_mac_cmd_pl mac_pl; }; struct ieee802154_association_req_frame { struct ieee802154_hdr mhr; struct ieee802154_mac_cmd_pl mac_pl; struct ieee802154_assoc_req_pl assoc_req_pl; }; struct ieee802154_association_resp_frame { struct ieee802154_hdr mhr; struct ieee802154_mac_cmd_pl mac_pl; struct ieee802154_assoc_resp_pl assoc_resp_pl; }; struct ieee802154_disassociation_notif_frame { struct ieee802154_hdr mhr; struct ieee802154_mac_cmd_pl mac_pl; u8 disassoc_pl; }; /* pushes hdr onto the skb. fields of hdr->fc that can be calculated from * the contents of hdr will be, and the actual value of those bits in * hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame * version, if SECEN is set. */ int ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr); /* pulls the entire 802.15.4 header off of the skb, including the security * header, and performs pan id decompression */ int ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr); /* parses the frame control, sequence number of address fields in a given skb * and stores them into hdr, performing pan id decompression and length checks * to be suitable for use in header_ops.parse */ int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr); /* parses the full 802.15.4 header a given skb and stores them into hdr, * performing pan id decompression and length checks to be suitable for use in * header_ops.parse */ int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr); /* pushes/pulls various frame types into/from an skb */ int ieee802154_beacon_push(struct sk_buff *skb, struct ieee802154_beacon_frame *beacon); int ieee802154_mac_cmd_push(struct sk_buff *skb, void *frame, const void *pl, unsigned int pl_len); int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb, struct ieee802154_mac_cmd_pl *mac_pl); int ieee802154_max_payload(const struct ieee802154_hdr *hdr); static inline int ieee802154_sechdr_authtag_len(const struct ieee802154_sechdr *sec) { switch (sec->level) { case IEEE802154_SCF_SECLEVEL_MIC32: case IEEE802154_SCF_SECLEVEL_ENC_MIC32: return 4; case IEEE802154_SCF_SECLEVEL_MIC64: case IEEE802154_SCF_SECLEVEL_ENC_MIC64: return 8; case IEEE802154_SCF_SECLEVEL_MIC128: case IEEE802154_SCF_SECLEVEL_ENC_MIC128: return 16; case IEEE802154_SCF_SECLEVEL_NONE: case IEEE802154_SCF_SECLEVEL_ENC: default: return 0; } } static inline int ieee802154_hdr_length(struct sk_buff *skb) { struct ieee802154_hdr hdr; int len = ieee802154_hdr_pull(skb, &hdr); if (len > 0) skb_push(skb, len); return len; } static inline bool ieee802154_addr_equal(const struct ieee802154_addr *a1, const struct ieee802154_addr *a2) { if (a1->pan_id != a2->pan_id || a1->mode != a2->mode) return false; if ((a1->mode == IEEE802154_ADDR_LONG && a1->extended_addr != a2->extended_addr) || (a1->mode == IEEE802154_ADDR_SHORT && a1->short_addr != a2->short_addr)) return false; return true; } static inline __le64 ieee802154_devaddr_from_raw(const void *raw) { u64 temp; memcpy(&temp, raw, IEEE802154_ADDR_LEN); return (__force __le64)swab64(temp); } static inline void ieee802154_devaddr_to_raw(void *raw, __le64 addr) { u64 temp = swab64((__force u64)addr); memcpy(raw, &temp, IEEE802154_ADDR_LEN); } static inline int ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len) { struct ieee802154_addr_sa *sa; int ret = 0; sa = &daddr->addr; if (len < IEEE802154_MIN_NAMELEN) return -EINVAL; switch (sa->addr_type) { case IEEE802154_ADDR_NONE: break; case IEEE802154_ADDR_SHORT: if (len < IEEE802154_NAMELEN_SHORT) ret = -EINVAL; break; case IEEE802154_ADDR_LONG: if (len < IEEE802154_NAMELEN_LONG) ret = -EINVAL; break; default: ret = -EINVAL; break; } return ret; } static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a, const struct ieee802154_addr_sa *sa) { a->mode = sa->addr_type; a->pan_id = cpu_to_le16(sa->pan_id); switch (a->mode) { case IEEE802154_ADDR_SHORT: a->short_addr = cpu_to_le16(sa->short_addr); break; case IEEE802154_ADDR_LONG: a->extended_addr = ieee802154_devaddr_from_raw(sa->hwaddr); break; } } static inline void ieee802154_addr_to_sa(struct ieee802154_addr_sa *sa, const struct ieee802154_addr *a) { sa->addr_type = a->mode; sa->pan_id = le16_to_cpu(a->pan_id); switch (a->mode) { case IEEE802154_ADDR_SHORT: sa->short_addr = le16_to_cpu(a->short_addr); break; case IEEE802154_ADDR_LONG: ieee802154_devaddr_to_raw(sa->hwaddr, a->extended_addr); break; } } /* * A control block of skb passed between the ARPHRD_IEEE802154 device * and other stack parts. */ struct ieee802154_mac_cb { u8 lqi; u8 type; bool ackreq; bool secen; bool secen_override; u8 seclevel; bool seclevel_override; struct ieee802154_addr source; struct ieee802154_addr dest; }; static inline struct ieee802154_mac_cb *mac_cb(struct sk_buff *skb) { return (struct ieee802154_mac_cb *)skb->cb; } static inline struct ieee802154_mac_cb *mac_cb_init(struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb)); memset(skb->cb, 0, sizeof(struct ieee802154_mac_cb)); return mac_cb(skb); } enum { IEEE802154_LLSEC_DEVKEY_IGNORE, IEEE802154_LLSEC_DEVKEY_RESTRICT, IEEE802154_LLSEC_DEVKEY_RECORD, __IEEE802154_LLSEC_DEVKEY_MAX, }; #define IEEE802154_MAC_SCAN_ED 0 #define IEEE802154_MAC_SCAN_ACTIVE 1 #define IEEE802154_MAC_SCAN_PASSIVE 2 #define IEEE802154_MAC_SCAN_ORPHAN 3 struct ieee802154_mac_params { s8 transmit_power; u8 min_be; u8 max_be; u8 csma_retries; s8 frame_retries; bool lbt; struct wpan_phy_cca cca; s32 cca_ed_level; }; struct wpan_phy; enum { IEEE802154_LLSEC_PARAM_ENABLED = BIT(0), IEEE802154_LLSEC_PARAM_FRAME_COUNTER = BIT(1), IEEE802154_LLSEC_PARAM_OUT_LEVEL = BIT(2), IEEE802154_LLSEC_PARAM_OUT_KEY = BIT(3), IEEE802154_LLSEC_PARAM_KEY_SOURCE = BIT(4), IEEE802154_LLSEC_PARAM_PAN_ID = BIT(5), IEEE802154_LLSEC_PARAM_HWADDR = BIT(6), IEEE802154_LLSEC_PARAM_COORD_HWADDR = BIT(7), IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = BIT(8), }; struct ieee802154_llsec_ops { int (*get_params)(struct net_device *dev, struct ieee802154_llsec_params *params); int (*set_params)(struct net_device *dev, const struct ieee802154_llsec_params *params, int changed); int (*add_key)(struct net_device *dev, const struct ieee802154_llsec_key_id *id, const struct ieee802154_llsec_key *key); int (*del_key)(struct net_device *dev, const struct ieee802154_llsec_key_id *id); int (*add_dev)(struct net_device *dev, const struct ieee802154_llsec_device *llsec_dev); int (*del_dev)(struct net_device *dev, __le64 dev_addr); int (*add_devkey)(struct net_device *dev, __le64 device_addr, const struct ieee802154_llsec_device_key *key); int (*del_devkey)(struct net_device *dev, __le64 device_addr, const struct ieee802154_llsec_device_key *key); int (*add_seclevel)(struct net_device *dev, const struct ieee802154_llsec_seclevel *sl); int (*del_seclevel)(struct net_device *dev, const struct ieee802154_llsec_seclevel *sl); void (*lock_table)(struct net_device *dev); void (*get_table)(struct net_device *dev, struct ieee802154_llsec_table **t); void (*unlock_table)(struct net_device *dev); }; /* * This should be located at net_device->ml_priv * * get_phy should increment the reference counting on returned phy. * Use wpan_wpy_put to put that reference. */ struct ieee802154_mlme_ops { /* The following fields are optional (can be NULL). */ int (*assoc_req)(struct net_device *dev, struct ieee802154_addr *addr, u8 channel, u8 page, u8 cap); int (*assoc_resp)(struct net_device *dev, struct ieee802154_addr *addr, __le16 short_addr, u8 status); int (*disassoc_req)(struct net_device *dev, struct ieee802154_addr *addr, u8 reason); int (*start_req)(struct net_device *dev, struct ieee802154_addr *addr, u8 channel, u8 page, u8 bcn_ord, u8 sf_ord, u8 pan_coord, u8 blx, u8 coord_realign); int (*scan_req)(struct net_device *dev, u8 type, u32 channels, u8 page, u8 duration); int (*set_mac_params)(struct net_device *dev, const struct ieee802154_mac_params *params); void (*get_mac_params)(struct net_device *dev, struct ieee802154_mac_params *params); const struct ieee802154_llsec_ops *llsec; }; static inline struct ieee802154_mlme_ops * ieee802154_mlme_ops(const struct net_device *dev) { return dev->ml_priv; } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * i2c.h - definitions for the Linux i2c bus interface * Copyright (C) 1995-2000 Simon G. Vogl * Copyright (C) 2013-2019 Wolfram Sang <wsa@kernel.org> * * With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi> and * Frodo Looijaard <frodol@dds.nl> */ #ifndef _LINUX_I2C_H #define _LINUX_I2C_H #include <linux/acpi.h> /* for acpi_handle */ #include <linux/bits.h> #include <linux/mod_devicetable.h> #include <linux/device.h> /* for struct device */ #include <linux/sched.h> /* for completion */ #include <linux/mutex.h> #include <linux/regulator/consumer.h> #include <linux/rtmutex.h> #include <linux/irqdomain.h> /* for Host Notify IRQ */ #include <linux/of.h> /* for struct device_node */ #include <linux/swab.h> /* for swab16 */ #include <uapi/linux/i2c.h> extern const struct bus_type i2c_bus_type; extern struct device_type i2c_adapter_type; extern struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ struct i2c_msg; struct i2c_algorithm; struct i2c_adapter; struct i2c_client; struct i2c_driver; struct i2c_device_identity; union i2c_smbus_data; struct i2c_board_info; enum i2c_slave_event; typedef int (*i2c_slave_cb_t)(struct i2c_client *client, enum i2c_slave_event event, u8 *val); /* I2C Frequency Modes */ #define I2C_MAX_STANDARD_MODE_FREQ 100000 #define I2C_MAX_FAST_MODE_FREQ 400000 #define I2C_MAX_FAST_MODE_PLUS_FREQ 1000000 #define I2C_MAX_TURBO_MODE_FREQ 1400000 #define I2C_MAX_HIGH_SPEED_MODE_FREQ 3400000 #define I2C_MAX_ULTRA_FAST_MODE_FREQ 5000000 struct module; struct property_entry; #if IS_ENABLED(CONFIG_I2C) /* Return the Frequency mode string based on the bus frequency */ const char *i2c_freq_mode_string(u32 bus_freq_hz); /* * The master routines are the ones normally used to transmit data to devices * on a bus (or read from them). Apart from two basic transfer functions to * transmit one message at a time, a more complex version can be used to * transmit an arbitrary number of messages without interruption. * @count must be less than 64k since msg.len is u16. */ int i2c_transfer_buffer_flags(const struct i2c_client *client, char *buf, int count, u16 flags); /** * i2c_master_recv - issue a single I2C message in master receive mode * @client: Handle to slave device * @buf: Where to store data read from slave * @count: How many bytes to read, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes read. */ static inline int i2c_master_recv(const struct i2c_client *client, char *buf, int count) { return i2c_transfer_buffer_flags(client, buf, count, I2C_M_RD); }; /** * i2c_master_recv_dmasafe - issue a single I2C message in master receive mode * using a DMA safe buffer * @client: Handle to slave device * @buf: Where to store data read from slave, must be safe to use with DMA * @count: How many bytes to read, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes read. */ static inline int i2c_master_recv_dmasafe(const struct i2c_client *client, char *buf, int count) { return i2c_transfer_buffer_flags(client, buf, count, I2C_M_RD | I2C_M_DMA_SAFE); }; /** * i2c_master_send - issue a single I2C message in master transmit mode * @client: Handle to slave device * @buf: Data that will be written to the slave * @count: How many bytes to write, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes written. */ static inline int i2c_master_send(const struct i2c_client *client, const char *buf, int count) { return i2c_transfer_buffer_flags(client, (char *)buf, count, 0); }; /** * i2c_master_send_dmasafe - issue a single I2C message in master transmit mode * using a DMA safe buffer * @client: Handle to slave device * @buf: Data that will be written to the slave, must be safe to use with DMA * @count: How many bytes to write, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes written. */ static inline int i2c_master_send_dmasafe(const struct i2c_client *client, const char *buf, int count) { return i2c_transfer_buffer_flags(client, (char *)buf, count, I2C_M_DMA_SAFE); }; /* Transfer num messages. */ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); /* Unlocked flavor */ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); /* This is the very generalized SMBus access routine. You probably do not want to use this, though; one of the functions below may be much easier, and probably just as fast. Note that we use i2c_adapter here, because you do not need a specific smbus adapter to call this function. */ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, union i2c_smbus_data *data); /* Unlocked flavor */ s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, union i2c_smbus_data *data); /* Now follow the 'nice' access routines. These also document the calling conventions of i2c_smbus_xfer. */ u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count); s32 i2c_smbus_read_byte(const struct i2c_client *client); s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value); s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command); s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command, u8 value); s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command); s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command, u16 value); static inline s32 i2c_smbus_read_word_swapped(const struct i2c_client *client, u8 command) { s32 value = i2c_smbus_read_word_data(client, command); return (value < 0) ? value : swab16(value); } static inline s32 i2c_smbus_write_word_swapped(const struct i2c_client *client, u8 command, u16 value) { return i2c_smbus_write_word_data(client, command, swab16(value)); } /* Returns the number of read bytes */ s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command, u8 *values); s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values); /* Returns the number of read bytes */ s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command, u8 length, u8 *values); s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values); s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, u8 command, u8 length, u8 *values); int i2c_get_device_id(const struct i2c_client *client, struct i2c_device_identity *id); const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client); #endif /* I2C */ /** * struct i2c_device_identity - i2c client device identification * @manufacturer_id: 0 - 4095, database maintained by NXP * @part_id: 0 - 511, according to manufacturer * @die_revision: 0 - 7, according to manufacturer */ struct i2c_device_identity { u16 manufacturer_id; #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS 0 #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_1 1 #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_2 2 #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_3 3 #define I2C_DEVICE_ID_RAMTRON_INTERNATIONAL 4 #define I2C_DEVICE_ID_ANALOG_DEVICES 5 #define I2C_DEVICE_ID_STMICROELECTRONICS 6 #define I2C_DEVICE_ID_ON_SEMICONDUCTOR 7 #define I2C_DEVICE_ID_SPRINTEK_CORPORATION 8 #define I2C_DEVICE_ID_ESPROS_PHOTONICS_AG 9 #define I2C_DEVICE_ID_FUJITSU_SEMICONDUCTOR 10 #define I2C_DEVICE_ID_FLIR 11 #define I2C_DEVICE_ID_O2MICRO 12 #define I2C_DEVICE_ID_ATMEL 13 #define I2C_DEVICE_ID_NONE 0xffff u16 part_id; u8 die_revision; }; enum i2c_alert_protocol { I2C_PROTOCOL_SMBUS_ALERT, I2C_PROTOCOL_SMBUS_HOST_NOTIFY, }; /** * enum i2c_driver_flags - Flags for an I2C device driver * * @I2C_DRV_ACPI_WAIVE_D0_PROBE: Don't put the device in D0 state for probe */ enum i2c_driver_flags { I2C_DRV_ACPI_WAIVE_D0_PROBE = BIT(0), }; /** * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol * @command: Callback for bus-wide signaling (optional) * @driver: Device driver model driver * @id_table: List of I2C devices supported by this driver * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) * @flags: A bitmask of flags defined in &enum i2c_driver_flags * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. * * For automatic device detection, both @detect and @address_list must * be defined. @class should also be set, otherwise only devices forced * with module parameters will be created. The detect function must * fill at least the name field of the i2c_board_info structure it is * handed upon successful detection, and possibly also the flags field. * * If @detect is missing, the driver will still work fine for enumerated * devices. Detected devices simply won't be supported. This is expected * for the many I2C/SMBus devices which can't be detected reliably, and * the ones which can always be enumerated in practice. * * The i2c_client structure which is handed to the @detect callback is * not a real i2c_client. It is initialized just enough so that you can * call i2c_smbus_read_byte_data and friends on it. Don't do anything * else with it. In particular, calling dev_dbg and friends on it is * not allowed. */ struct i2c_driver { unsigned int class; /* Standard driver model interfaces */ int (*probe)(struct i2c_client *client); void (*remove)(struct i2c_client *client); /* driver model interfaces that don't relate to enumeration */ void (*shutdown)(struct i2c_client *client); /* Alert callback, for example for the SMBus alert protocol. * The format and meaning of the data value depends on the protocol. * For the SMBus alert protocol, there is a single bit of data passed * as the alert response's low bit ("event flag"). * For the SMBus Host Notify protocol, the data corresponds to the * 16-bit payload data reported by the slave device acting as master. */ void (*alert)(struct i2c_client *client, enum i2c_alert_protocol protocol, unsigned int data); /* a ioctl like command that can be used to perform specific functions * with the device. */ int (*command)(struct i2c_client *client, unsigned int cmd, void *arg); struct device_driver driver; const struct i2c_device_id *id_table; /* Device detection callback for automatic device creation */ int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; u32 flags; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) /** * struct i2c_client - represent an I2C slave device * @flags: see I2C_CLIENT_* for possible flags * @addr: Address used on the I2C bus connected to the parent adapter. * @name: Indicates the type of the device, usually a chip name that's * generic enough to hide second-sourcing and compatible revisions. * @adapter: manages the bus segment hosting this I2C device * @dev: Driver model device node for the slave. * @init_irq: IRQ that was set at initialization * @irq: indicates the IRQ generated by this device (if any) * @detected: member of an i2c_driver.clients list or i2c-core's * userspace_devices list * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter * calls it to pass on slave events to the slave driver. * @devres_group_id: id of the devres group that will be created for resources * acquired when probing this device. * * An i2c_client identifies a single device (i.e. chip) connected to an * i2c bus. The behaviour exposed to Linux is defined by the driver * managing the device. */ struct i2c_client { unsigned short flags; /* div., see below */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ /* Must equal I2C_M_TEN below */ #define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ #define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ #define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ #define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ unsigned short addr; /* chip address - NOTE: 7bit */ /* addresses are stored in the */ /* _LOWER_ 7 bits */ char name[I2C_NAME_SIZE]; struct i2c_adapter *adapter; /* the adapter we sit on */ struct device dev; /* the device structure */ int init_irq; /* irq set at initialization */ int irq; /* irq issued by device */ struct list_head detected; #if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif void *devres_group_id; /* ID of probe devres group */ }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) struct i2c_adapter *i2c_verify_adapter(struct device *dev); const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, const struct i2c_client *client); const void *i2c_get_match_data(const struct i2c_client *client); static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) { struct device * const dev = kobj_to_dev(kobj); return to_i2c_client(dev); } static inline void *i2c_get_clientdata(const struct i2c_client *client) { return dev_get_drvdata(&client->dev); } static inline void i2c_set_clientdata(struct i2c_client *client, void *data) { dev_set_drvdata(&client->dev, data); } /* I2C slave support */ enum i2c_slave_event { I2C_SLAVE_READ_REQUESTED, I2C_SLAVE_WRITE_REQUESTED, I2C_SLAVE_READ_PROCESSED, I2C_SLAVE_WRITE_RECEIVED, I2C_SLAVE_STOP, }; int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb); int i2c_slave_unregister(struct i2c_client *client); int i2c_slave_event(struct i2c_client *client, enum i2c_slave_event event, u8 *val); #if IS_ENABLED(CONFIG_I2C_SLAVE) bool i2c_detect_slave_mode(struct device *dev); #else static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } #endif /** * struct i2c_board_info - template for device creation * @type: chip type, to initialize i2c_client.name * @flags: to initialize i2c_client.flags * @addr: stored in i2c_client.addr * @dev_name: Overrides the default <busnr>-<addr> dev_name if set * @platform_data: stored in i2c_client.dev.platform_data * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware * @swnode: software node for the device * @resources: resources associated with the device * @num_resources: number of resources in the @resources array * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and * devices may be able to use I2C_SMBUS_QUICK to tell whether or not there's * a device at a given address. Drivers commonly need more information than * that, such as chip type, configuration, associated IRQ, and so on. * * i2c_board_info is used to build tables of information listing I2C devices * that are present. This information is used to grow the driver model tree. * For mainboards this is done statically using i2c_register_board_info(); * bus numbers identify adapters that aren't yet available. For add-on boards, * i2c_new_client_device() does this dynamically with the adapter already known. */ struct i2c_board_info { char type[I2C_NAME_SIZE]; unsigned short flags; unsigned short addr; const char *dev_name; void *platform_data; struct device_node *of_node; struct fwnode_handle *fwnode; const struct software_node *swnode; const struct resource *resources; unsigned int num_resources; int irq; }; /** * I2C_BOARD_INFO - macro used to list an i2c device and its address * @dev_type: identifies the device type * @dev_addr: the device's address on the bus. * * This macro initializes essential fields of a struct i2c_board_info, * declaring what has been provided on a particular board. Optional * fields (such as associated irq, or device-specific platform_data) * are provided using conventional syntax. */ #define I2C_BOARD_INFO(dev_type, dev_addr) \ .type = dev_type, .addr = (dev_addr) #if IS_ENABLED(CONFIG_I2C) /* * Add-on boards should register/unregister their devices; e.g. a board * with integrated I2C, a config eeprom, sensors, and a codec that's * used in conjunction with the primary hardware. */ struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); /* If you don't know the exact address of an I2C device, use this variant * instead, which can probe for device presence in a list of possible * addresses. The "probe" callback function is optional. If it is provided, * it must return 1 on successful probe, 0 otherwise. If it is not provided, * a default probing method is used. */ struct i2c_client * i2c_new_scanned_device(struct i2c_adapter *adap, struct i2c_board_info *info, unsigned short const *addr_list, int (*probe)(struct i2c_adapter *adap, unsigned short addr)); /* Common custom probe functions */ int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); struct i2c_client * i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address); struct i2c_client * devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address); struct i2c_client * i2c_new_ancillary_device(struct i2c_client *client, const char *name, u16 default_addr); void i2c_unregister_device(struct i2c_client *client); struct i2c_client *i2c_verify_client(struct device *dev); #else static inline struct i2c_client *i2c_verify_client(struct device *dev) { return NULL; } #endif /* I2C */ /* Mainboard arch_initcall() code should register all its I2C devices. * This is done at arch_initcall time, before declaring any i2c adapters. * Modules for add-on boards must use other calls. */ #ifdef CONFIG_I2C_BOARDINFO int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n); #else static inline int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n) { return 0; } #endif /* I2C_BOARDINFO */ /** * struct i2c_algorithm - represent I2C transfer method * @master_xfer: Issue a set of i2c transactions to the given I2C adapter * defined by the msgs array, with num messages available to transfer via * the adapter specified by adap. * @master_xfer_atomic: same as @master_xfer. Yet, only using atomic context * so e.g. PMICs can be accessed very late before shutdown. Optional. * @smbus_xfer: Issue smbus transactions to the given I2C adapter. If this * is not present, then the bus layer will try and convert the SMBus calls * into I2C transfers instead. * @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context * so e.g. PMICs can be accessed very late before shutdown. Optional. * @functionality: Return the flags that this algorithm/adapter pair supports * from the ``I2C_FUNC_*`` flags. * @reg_slave: Register given client to I2C slave mode of this adapter * @unreg_slave: Unregister given client from I2C slave mode of this adapter * * The following structs are for those who like to implement new bus drivers: * i2c_algorithm is the interface to a class of hardware solutions which can * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584 * to name two of the most common. * * The return codes from the ``master_xfer{_atomic}`` fields should indicate the * type of error code that occurred during the transfer, as documented in the * Kernel Documentation file Documentation/i2c/fault-codes.rst. Otherwise, the * number of messages executed should be returned. */ struct i2c_algorithm { /* * If an adapter algorithm can't do I2C-level access, set master_xfer * to NULL. If an adapter algorithm can do SMBus access, set * smbus_xfer. If set to NULL, the SMBus protocol is simulated * using common I2C messages. * * master_xfer should return the number of messages successfully * processed, or a negative value on error */ int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); int (*master_xfer_atomic)(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); int (*smbus_xfer)(struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data *data); int (*smbus_xfer_atomic)(struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data *data); /* To determine what the adapter supports */ u32 (*functionality)(struct i2c_adapter *adap); #if IS_ENABLED(CONFIG_I2C_SLAVE) int (*reg_slave)(struct i2c_client *client); int (*unreg_slave)(struct i2c_client *client); #endif }; /** * struct i2c_lock_operations - represent I2C locking operations * @lock_bus: Get exclusive access to an I2C bus segment * @trylock_bus: Try to get exclusive access to an I2C bus segment * @unlock_bus: Release exclusive access to an I2C bus segment * * The main operations are wrapped by i2c_lock_bus and i2c_unlock_bus. */ struct i2c_lock_operations { void (*lock_bus)(struct i2c_adapter *adapter, unsigned int flags); int (*trylock_bus)(struct i2c_adapter *adapter, unsigned int flags); void (*unlock_bus)(struct i2c_adapter *adapter, unsigned int flags); }; /** * struct i2c_timings - I2C timing information * @bus_freq_hz: the bus frequency in Hz * @scl_rise_ns: time SCL signal takes to rise in ns; t(r) in the I2C specification * @scl_fall_ns: time SCL signal takes to fall in ns; t(f) in the I2C specification * @scl_int_delay_ns: time IP core additionally needs to setup SCL in ns * @sda_fall_ns: time SDA signal takes to fall in ns; t(f) in the I2C specification * @sda_hold_ns: time IP core additionally needs to hold SDA in ns * @digital_filter_width_ns: width in ns of spikes on i2c lines that the IP core * digital filter can filter out * @analog_filter_cutoff_freq_hz: threshold frequency for the low pass IP core * analog filter */ struct i2c_timings { u32 bus_freq_hz; u32 scl_rise_ns; u32 scl_fall_ns; u32 scl_int_delay_ns; u32 sda_fall_ns; u32 sda_hold_ns; u32 digital_filter_width_ns; u32 analog_filter_cutoff_freq_hz; }; /** * struct i2c_bus_recovery_info - I2C bus recovery information * @recover_bus: Recover routine. Either pass driver's recover_bus() routine, or * i2c_generic_scl_recovery(). * @get_scl: This gets current value of SCL line. Mandatory for generic SCL * recovery. Populated internally for generic GPIO recovery. * @set_scl: This sets/clears the SCL line. Mandatory for generic SCL recovery. * Populated internally for generic GPIO recovery. * @get_sda: This gets current value of SDA line. This or set_sda() is mandatory * for generic SCL recovery. Populated internally, if sda_gpio is a valid * GPIO, for generic GPIO recovery. * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for * generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO, * for generic GPIO recovery. * @get_bus_free: Returns the bus free state as seen from the IP core in case it * has a more complex internal logic than just reading SDA. Optional. * @prepare_recovery: This will be called before starting recovery. Platform may * configure padmux here for SDA/SCL line or something else they want. * @unprepare_recovery: This will be called after completing recovery. Platform * may configure padmux here for SDA/SCL line or something else they want. * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. * @pinctrl: pinctrl used by GPIO recovery to change the state of the I2C pins. * Optional. * @pins_default: default pinctrl state of SCL/SDA lines, when they are assigned * to the I2C bus. Optional. Populated internally for GPIO recovery, if * state with the name PINCTRL_STATE_DEFAULT is found and pinctrl is valid. * @pins_gpio: recovery pinctrl state of SCL/SDA lines, when they are used as * GPIOs. Optional. Populated internally for GPIO recovery, if this state * is called "gpio" or "recovery" and pinctrl is valid. */ struct i2c_bus_recovery_info { int (*recover_bus)(struct i2c_adapter *adap); int (*get_scl)(struct i2c_adapter *adap); void (*set_scl)(struct i2c_adapter *adap, int val); int (*get_sda)(struct i2c_adapter *adap); void (*set_sda)(struct i2c_adapter *adap, int val); int (*get_bus_free)(struct i2c_adapter *adap); void (*prepare_recovery)(struct i2c_adapter *adap); void (*unprepare_recovery)(struct i2c_adapter *adap); /* gpio recovery */ struct gpio_desc *scl_gpiod; struct gpio_desc *sda_gpiod; struct pinctrl *pinctrl; struct pinctrl_state *pins_default; struct pinctrl_state *pins_gpio; }; int i2c_recover_bus(struct i2c_adapter *adap); /* Generic recovery routines */ int i2c_generic_scl_recovery(struct i2c_adapter *adap); /** * struct i2c_adapter_quirks - describe flaws of an i2c adapter * @flags: see I2C_AQ_* for possible flags and read below * @max_num_msgs: maximum number of messages per transfer * @max_write_len: maximum length of a write message * @max_read_len: maximum length of a read message * @max_comb_1st_msg_len: maximum length of the first msg in a combined message * @max_comb_2nd_msg_len: maximum length of the second msg in a combined message * * Note about combined messages: Some I2C controllers can only send one message * per transfer, plus something called combined message or write-then-read. * This is (usually) a small write message followed by a read message and * barely enough to access register based devices like EEPROMs. There is a flag * to support this mode. It implies max_num_msg = 2 and does the length checks * with max_comb_*_len because combined message mode usually has its own * limitations. Because of HW implementations, some controllers can actually do * write-then-anything or other variants. To support that, write-then-read has * been broken out into smaller bits like write-first and read-second which can * be combined as needed. */ struct i2c_adapter_quirks { u64 flags; int max_num_msgs; u16 max_write_len; u16 max_read_len; u16 max_comb_1st_msg_len; u16 max_comb_2nd_msg_len; }; /* enforce max_num_msgs = 2 and use max_comb_*_len for length checks */ #define I2C_AQ_COMB BIT(0) /* first combined message must be write */ #define I2C_AQ_COMB_WRITE_FIRST BIT(1) /* second combined message must be read */ #define I2C_AQ_COMB_READ_SECOND BIT(2) /* both combined messages must have the same target address */ #define I2C_AQ_COMB_SAME_ADDR BIT(3) /* convenience macro for typical write-then read case */ #define I2C_AQ_COMB_WRITE_THEN_READ (I2C_AQ_COMB | I2C_AQ_COMB_WRITE_FIRST | \ I2C_AQ_COMB_READ_SECOND | I2C_AQ_COMB_SAME_ADDR) /* clock stretching is not supported */ #define I2C_AQ_NO_CLK_STRETCH BIT(4) /* message cannot have length of 0 */ #define I2C_AQ_NO_ZERO_LEN_READ BIT(5) #define I2C_AQ_NO_ZERO_LEN_WRITE BIT(6) #define I2C_AQ_NO_ZERO_LEN (I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE) /* adapter cannot do repeated START */ #define I2C_AQ_NO_REP_START BIT(7) /* * i2c_adapter is the structure used to identify a physical i2c bus along * with the access algorithms necessary to access it. */ struct i2c_adapter { struct module *owner; unsigned int class; /* classes to allow probing for */ const struct i2c_algorithm *algo; /* the algorithm to access the bus */ void *algo_data; /* data fields that are valid for all devices */ const struct i2c_lock_operations *lock_ops; struct rt_mutex bus_lock; struct rt_mutex mux_lock; int timeout; /* in jiffies */ int retries; struct device dev; /* the adapter device */ unsigned long locked_flags; /* owned by the I2C core */ #define I2C_ALF_IS_SUSPENDED 0 #define I2C_ALF_SUSPEND_REPORTED 1 int nr; char name[48]; struct completion dev_released; struct mutex userspace_clients_lock; struct list_head userspace_clients; struct i2c_bus_recovery_info *bus_recovery_info; const struct i2c_adapter_quirks *quirks; struct irq_domain *host_notify_domain; struct regulator *bus_regulator; struct dentry *debugfs; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) static inline void *i2c_get_adapdata(const struct i2c_adapter *adap) { return dev_get_drvdata(&adap->dev); } static inline void i2c_set_adapdata(struct i2c_adapter *adap, void *data) { dev_set_drvdata(&adap->dev, data); } static inline struct i2c_adapter * i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter) { #if IS_ENABLED(CONFIG_I2C_MUX) struct device *parent = adapter->dev.parent; if (parent != NULL && parent->type == &i2c_adapter_type) return to_i2c_adapter(parent); else #endif return NULL; } int i2c_for_each_dev(void *data, int (*fn)(struct device *dev, void *data)); /* Adapter locking functions, exported for shared pin cases */ #define I2C_LOCK_ROOT_ADAPTER BIT(0) #define I2C_LOCK_SEGMENT BIT(1) /** * i2c_lock_bus - Get exclusive access to an I2C bus segment * @adapter: Target I2C bus segment * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT * locks only this branch in the adapter tree */ static inline void i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags) { adapter->lock_ops->lock_bus(adapter, flags); } /** * i2c_trylock_bus - Try to get exclusive access to an I2C bus segment * @adapter: Target I2C bus segment * @flags: I2C_LOCK_ROOT_ADAPTER tries to locks the root i2c adapter, * I2C_LOCK_SEGMENT tries to lock only this branch in the adapter tree * * Return: true if the I2C bus segment is locked, false otherwise */ static inline int i2c_trylock_bus(struct i2c_adapter *adapter, unsigned int flags) { return adapter->lock_ops->trylock_bus(adapter, flags); } /** * i2c_unlock_bus - Release exclusive access to an I2C bus segment * @adapter: Target I2C bus segment * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT * unlocks only this branch in the adapter tree */ static inline void i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags) { adapter->lock_ops->unlock_bus(adapter, flags); } /** * i2c_mark_adapter_suspended - Report suspended state of the adapter to the core * @adap: Adapter to mark as suspended * * When using this helper to mark an adapter as suspended, the core will reject * further transfers to this adapter. The usage of this helper is optional but * recommended for devices having distinct handlers for system suspend and * runtime suspend. More complex devices are free to implement custom solutions * to reject transfers when suspended. */ static inline void i2c_mark_adapter_suspended(struct i2c_adapter *adap) { i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); set_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); } /** * i2c_mark_adapter_resumed - Report resumed state of the adapter to the core * @adap: Adapter to mark as resumed * * When using this helper to mark an adapter as resumed, the core will allow * further transfers to this adapter. See also further notes to * @i2c_mark_adapter_suspended(). */ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) { i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); clear_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); } /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ /* Warn users that the adapter doesn't support classes anymore */ #define I2C_CLASS_DEPRECATED (1<<8) /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU /* Construct an I2C_CLIENT_END-terminated array of i2c addresses */ #define I2C_ADDRS(addr, addrs...) \ ((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END }) /* ----- functions exported by i2c.o */ /* administration... */ #if IS_ENABLED(CONFIG_I2C) int i2c_add_adapter(struct i2c_adapter *adap); int devm_i2c_add_adapter(struct device *dev, struct i2c_adapter *adapter); void i2c_del_adapter(struct i2c_adapter *adap); int i2c_add_numbered_adapter(struct i2c_adapter *adap); int i2c_register_driver(struct module *owner, struct i2c_driver *driver); void i2c_del_driver(struct i2c_driver *driver); /* use a define to avoid include chaining to get THIS_MODULE */ #define i2c_add_driver(driver) \ i2c_register_driver(THIS_MODULE, driver) static inline bool i2c_client_has_driver(struct i2c_client *client) { return !IS_ERR_OR_NULL(client) && client->dev.driver; } /* call the i2c_client->command() of all attached clients with * the given arguments */ void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg); struct i2c_adapter *i2c_get_adapter(int nr); void i2c_put_adapter(struct i2c_adapter *adap); unsigned int i2c_adapter_depth(struct i2c_adapter *adapter); void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults); /* Return the functionality mask */ static inline u32 i2c_get_functionality(struct i2c_adapter *adap) { return adap->algo->functionality(adap); } /* Return 1 if adapter supports everything we need, 0 if not. */ static inline int i2c_check_functionality(struct i2c_adapter *adap, u32 func) { return (func & i2c_get_functionality(adap)) == func; } /** * i2c_check_quirks() - Function for checking the quirk flags in an i2c adapter * @adap: i2c adapter * @quirks: quirk flags * * Return: true if the adapter has all the specified quirk flags, false if not */ static inline bool i2c_check_quirks(struct i2c_adapter *adap, u64 quirks) { if (!adap->quirks) return false; return (adap->quirks->flags & quirks) == quirks; } /* Return the adapter number for a specific adapter */ static inline int i2c_adapter_id(struct i2c_adapter *adap) { return adap->nr; } static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) { return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); } u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred); int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); /** * module_i2c_driver() - Helper macro for registering a modular I2C driver * @__i2c_driver: i2c_driver struct * * Helper macro for I2C drivers which do not do anything special in module * init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_i2c_driver(__i2c_driver) \ module_driver(__i2c_driver, i2c_add_driver, \ i2c_del_driver) /** * builtin_i2c_driver() - Helper macro for registering a builtin I2C driver * @__i2c_driver: i2c_driver struct * * Helper macro for I2C drivers which do not do anything special in their * init. This eliminates a lot of boilerplate. Each driver may only * use this macro once, and calling it replaces device_initcall(). */ #define builtin_i2c_driver(__i2c_driver) \ builtin_driver(__i2c_driver, i2c_add_driver) #endif /* I2C */ /* must call put_device() when done with returned i2c_client device */ struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode); /* must call put_device() when done with returned i2c_adapter device */ struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode); /* must call i2c_put_adapter() when done with returned i2c_adapter device */ struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode); #if IS_ENABLED(CONFIG_OF) /* must call put_device() when done with returned i2c_client device */ static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) { return i2c_find_device_by_fwnode(of_fwnode_handle(node)); } /* must call put_device() when done with returned i2c_adapter device */ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node) { return i2c_find_adapter_by_fwnode(of_fwnode_handle(node)); } /* must call i2c_put_adapter() when done with returned i2c_adapter device */ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) { return i2c_get_adapter_by_fwnode(of_fwnode_handle(node)); } const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, struct i2c_client *client); int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info); #else static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) { return NULL; } static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node) { return NULL; } static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) { return NULL; } static inline const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, struct i2c_client *client) { return NULL; } static inline int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info) { return -ENOTSUPP; } #endif /* CONFIG_OF */ struct acpi_resource; struct acpi_resource_i2c_serialbus; #if IS_ENABLED(CONFIG_ACPI) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); u32 i2c_acpi_find_bus_speed(struct device *dev); struct i2c_client *i2c_acpi_new_device_by_fwnode(struct fwnode_handle *fwnode, int index, struct i2c_board_info *info); struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle); bool i2c_acpi_waive_d0_probe(struct device *dev); #else static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c) { return false; } static inline int i2c_acpi_client_count(struct acpi_device *adev) { return 0; } static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; } static inline struct i2c_client *i2c_acpi_new_device_by_fwnode( struct fwnode_handle *fwnode, int index, struct i2c_board_info *info) { return ERR_PTR(-ENODEV); } static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) { return NULL; } static inline bool i2c_acpi_waive_d0_probe(struct device *dev) { return false; } #endif /* CONFIG_ACPI */ static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, struct i2c_board_info *info) { return i2c_acpi_new_device_by_fwnode(dev_fwnode(dev), index, info); } #endif /* _LINUX_I2C_H */
69 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 // SPDX-License-Identifier: GPL-2.0-or-later /* * xpress_decompress.c - A decompressor for the XPRESS compression format * (Huffman variant), which can be used in "System Compressed" files. This is * based on the code from wimlib. * * Copyright (C) 2015 Eric Biggers */ #include "decompress_common.h" #include "lib.h" #define XPRESS_NUM_SYMBOLS 512 #define XPRESS_MAX_CODEWORD_LEN 15 #define XPRESS_MIN_MATCH_LEN 3 /* This value is chosen for fast decompression. */ #define XPRESS_TABLEBITS 12 /* Reusable heap-allocated memory for XPRESS decompression */ struct xpress_decompressor { /* The Huffman decoding table */ u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS]; /* An array that maps symbols to codeword lengths */ u8 lens[XPRESS_NUM_SYMBOLS]; /* Temporary space for make_huffman_decode_table() */ u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) + XPRESS_NUM_SYMBOLS]; }; /* * xpress_allocate_decompressor - Allocate an XPRESS decompressor * * Return the pointer to the decompressor on success, or return NULL and set * errno on failure. */ struct xpress_decompressor *xpress_allocate_decompressor(void) { return kmalloc(sizeof(struct xpress_decompressor), GFP_NOFS); } /* * xpress_decompress - Decompress a buffer of XPRESS-compressed data * * @decompressor: A decompressor that was allocated with * xpress_allocate_decompressor() * @compressed_data: The buffer of data to decompress * @compressed_size: Number of bytes of compressed data * @uncompressed_data: The buffer in which to store the decompressed data * @uncompressed_size: The number of bytes the data decompresses into * * Return 0 on success, or return -1 and set errno on failure. */ int xpress_decompress(struct xpress_decompressor *decompressor, const void *compressed_data, size_t compressed_size, void *uncompressed_data, size_t uncompressed_size) { struct xpress_decompressor *d = decompressor; const u8 * const in_begin = compressed_data; u8 * const out_begin = uncompressed_data; u8 *out_next = out_begin; u8 * const out_end = out_begin + uncompressed_size; struct input_bitstream is; u32 i; /* Read the Huffman codeword lengths. */ if (compressed_size < XPRESS_NUM_SYMBOLS / 2) goto invalid; for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { d->lens[i*2 + 0] = in_begin[i] & 0xF; d->lens[i*2 + 1] = in_begin[i] >> 4; } /* Build a decoding table for the Huffman code. */ if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, d->lens, XPRESS_MAX_CODEWORD_LEN, d->working_space)) goto invalid; /* Decode the matches and literals. */ init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, compressed_size - XPRESS_NUM_SYMBOLS / 2); while (out_next != out_end) { u32 sym; u32 log2_offset; u32 length; u32 offset; sym = read_huffsym(&is, d->decode_table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); if (sym < 256) { /* Literal */ *out_next++ = sym; } else { /* Match */ length = sym & 0xf; log2_offset = (sym >> 4) & 0xf; bitstream_ensure_bits(&is, 16); offset = ((u32)1 << log2_offset) | bitstream_pop_bits(&is, log2_offset); if (length == 0xf) { length += bitstream_read_byte(&is); if (length == 0xf + 0xff) length = bitstream_read_u16(&is); } length += XPRESS_MIN_MATCH_LEN; if (offset > (size_t)(out_next - out_begin)) goto invalid; if (length > (size_t)(out_end - out_next)) goto invalid; out_next = lz_copy(out_next, length, offset, out_end, XPRESS_MIN_MATCH_LEN); } } return 0; invalid: return -1; } /* * xpress_free_decompressor - Free an XPRESS decompressor * * @decompressor: A decompressor that was allocated with * xpress_allocate_decompressor(), or NULL. */ void xpress_free_decompressor(struct xpress_decompressor *decompressor) { kfree(decompressor); }
3 1 4 1 3 4 4 5 5 5 1 4 5 5 5 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 // SPDX-License-Identifier: GPL-2.0-only /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Monitoring SMC transport protocol sockets * * Copyright IBM Corp. 2016 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/init.h> #include <linux/sock_diag.h> #include <linux/inet_diag.h> #include <linux/smc_diag.h> #include <net/netlink.h> #include <net/smc.h> #include "smc.h" #include "smc_core.h" #include "smc_ism.h" struct smc_diag_dump_ctx { int pos[2]; }; static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb) { return (struct smc_diag_dump_ctx *)cb->ctx; } static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; if (sk->sk_protocol == SMCPROTO_SMC) { r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_protocol == SMCPROTO_SMC6) { memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr, sizeof(smc->clcsock->sk->sk_v6_rcv_saddr)); memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr, sizeof(smc->clcsock->sk->sk_v6_daddr)); #endif } } static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct smc_diag_msg *r, struct user_namespace *user_ns) { if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown)) return 1; r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->diag_inode = sock_i_ino(sk); return 0; } static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct smc_diag_req *req, struct nlattr *bc) { struct smc_sock *smc = smc_sk(sk); struct smc_diag_fallback fallback; struct user_namespace *user_ns; struct smc_diag_msg *r; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); smc_diag_msg_common_fill(r, sk); r->diag_state = sk->sk_state; if (smc->use_fallback) r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd) r->diag_mode = SMC_DIAG_MODE_SMCD; else r->diag_mode = SMC_DIAG_MODE_SMCR; user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk); if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; fallback.reason = smc->fallback_rsn; fallback.peer_diagnosis = smc->peer_diagnosis; if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0) goto errout; if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, .sndbuf_size = conn->sndbuf_desc ? conn->sndbuf_desc->len : 0, .rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0, .peer_rmbe_size = conn->peer_rmbe_size, .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap, .rx_prod.count = conn->local_rx_ctrl.prod.count, .rx_cons.wrap = conn->local_rx_ctrl.cons.wrap, .rx_cons.count = conn->local_rx_ctrl.cons.count, .tx_prod.wrap = conn->local_tx_ctrl.prod.wrap, .tx_prod.count = conn->local_tx_ctrl.prod.count, .tx_cons.wrap = conn->local_tx_ctrl.cons.wrap, .tx_cons.count = conn->local_tx_ctrl.cons.count, .tx_prod_flags = *(u8 *)&conn->local_tx_ctrl.prod_flags, .tx_conn_state_flags = *(u8 *)&conn->local_tx_ctrl.conn_state_flags, .rx_prod_flags = *(u8 *)&conn->local_rx_ctrl.prod_flags, .rx_conn_state_flags = *(u8 *)&conn->local_rx_ctrl.conn_state_flags, .tx_prep.wrap = conn->tx_curs_prep.wrap, .tx_prep.count = conn->tx_curs_prep.count, .tx_sent.wrap = conn->tx_curs_sent.wrap, .tx_sent.count = conn->tx_curs_sent.count, .tx_fin.wrap = conn->tx_curs_fin.wrap, .tx_fin.count = conn->tx_curs_fin.count, }; if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0) goto errout; } if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = link->ibport, .lnk[0].link_id = link->link_id, }; memcpy(linfo.lnk[0].ibname, link->smcibdev->ibdev->name, sizeof(link->smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, link->gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid); if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; struct smcd_gid smcd_gid; memset(&dinfo, 0, sizeof(dinfo)); dinfo.linkid = *((u32 *)conn->lgr->id); dinfo.peer_gid = conn->lgr->peer_gid.gid; dinfo.peer_gid_ext = conn->lgr->peer_gid.gid_ext; smcd->ops->get_local_gid(smcd, &smcd_gid); dinfo.my_gid = smcd_gid.gid; dinfo.my_gid_ext = smcd_gid.gid_ext; dinfo.token = conn->rmb_desc->token; dinfo.peer_token = conn->peer_token; if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) goto errout; } nlmsg_end(skb, nlh); return 0; errout: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb, struct netlink_callback *cb, int p_type) { struct smc_diag_dump_ctx *cb_ctx = smc_dump_context(cb); struct net *net = sock_net(skb->sk); int snum = cb_ctx->pos[p_type]; struct nlattr *bc = NULL; struct hlist_head *head; int rc = 0, num = 0; struct sock *sk; read_lock(&prot->h.smc_hash->lock); head = &prot->h.smc_hash->ht; if (hlist_empty(head)) goto out; sk_for_each(sk, head) { if (!net_eq(sock_net(sk), net)) continue; if (num < snum) goto next; rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc); if (rc < 0) goto out; next: num++; } out: read_unlock(&prot->h.smc_hash->lock); cb_ctx->pos[p_type] = num; return rc; } static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { int rc = 0; rc = smc_diag_dump_proto(&smc_proto, skb, cb, SMCPROTO_SMC); if (!rc) smc_diag_dump_proto(&smc_proto6, skb, cb, SMCPROTO_SMC6); return skb->len; } static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { struct net *net = sock_net(skb->sk); if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && h->nlmsg_flags & NLM_F_DUMP) { { struct netlink_dump_control c = { .dump = smc_diag_dump, .min_dump_alloc = SKB_WITH_OVERHEAD(32768), }; return netlink_dump_start(net->diag_nlsk, skb, h, &c); } } return 0; } static const struct sock_diag_handler smc_diag_handler = { .family = AF_SMC, .dump = smc_diag_handler_dump, }; static int __init smc_diag_init(void) { return sock_diag_register(&smc_diag_handler); } static void __exit smc_diag_exit(void) { sock_diag_unregister(&smc_diag_handler); } module_init(smc_diag_init); module_exit(smc_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SMC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME);
95 20 4 2 3 2 5 2 4 3 3 3 2 7 3 4 2 6 4 3 1 2 3 2 2 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BPF_CGROUP_H #define _BPF_CGROUP_H #include <linux/bpf.h> #include <linux/bpf-cgroup-defs.h> #include <linux/errno.h> #include <linux/jump_label.h> #include <linux/percpu.h> #include <linux/rbtree.h> #include <net/sock.h> #include <uapi/linux/bpf.h> struct sock; struct sockaddr; struct cgroup; struct sk_buff; struct bpf_map; struct bpf_prog; struct bpf_sock_ops_kern; struct bpf_cgroup_storage; struct ctl_table; struct ctl_table_header; struct task_struct; unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, const struct bpf_insn *insn); unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, const struct bpf_insn *insn); unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, const struct bpf_insn *insn); #ifdef CONFIG_CGROUP_BPF #define CGROUP_ATYPE(type) \ case BPF_##type: return type static inline enum cgroup_bpf_attach_type to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) { switch (attach_type) { CGROUP_ATYPE(CGROUP_INET_INGRESS); CGROUP_ATYPE(CGROUP_INET_EGRESS); CGROUP_ATYPE(CGROUP_INET_SOCK_CREATE); CGROUP_ATYPE(CGROUP_SOCK_OPS); CGROUP_ATYPE(CGROUP_DEVICE); CGROUP_ATYPE(CGROUP_INET4_BIND); CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } } #undef CGROUP_ATYPE extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype]) #define for_each_cgroup_storage_type(stype) \ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) struct bpf_cgroup_storage_map; struct bpf_storage_buffer { struct rcu_head rcu; char data[]; }; struct bpf_cgroup_storage { union { struct bpf_storage_buffer *buf; void __percpu *percpu_buf; }; struct bpf_cgroup_storage_map *map; struct bpf_cgroup_storage_key key; struct list_head list_map; struct list_head list_cg; struct rb_node node; struct rcu_head rcu; }; struct bpf_cgroup_link { struct bpf_link link; struct cgroup *cgroup; enum bpf_attach_type type; }; struct bpf_prog_list { struct hlist_node node; struct bpf_prog *prog; struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_sk(struct sock *sk, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, char **buf, size_t *pcount, loff_t *ppos, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, int *optname, sockptr_t optval, int *optlen, char **kernel_optval); int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen, int max_optlen, int retval); int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, int optname, void *optval, int *optlen, int retval); static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) return BPF_CGROUP_STORAGE_PERCPU; return BPF_CGROUP_STORAGE_SHARED; } struct bpf_cgroup_storage * cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, void *key, bool locked); struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, enum bpf_cgroup_storage_type stype); void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, struct cgroup *cgroup, enum bpf_attach_type type); void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags); /* Opportunistic check to see whether we have any BPF program attached*/ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, enum cgroup_bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); struct bpf_prog_array *array; array = rcu_access_pointer(cgrp->bpf.effective[type]); return array != &bpf_empty_prog_array.hdr; } /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ } \ __ret; \ }) #define BPF_CGROUP_RUN_SK_PROG(sk, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ __ret = __cgroup_bpf_run_filter_sk(sk, atype); \ } \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_CREATE) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_RELEASE) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET4_POST_BIND) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ atype, NULL, NULL); \ __ret; \ }) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ }) /* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags * via upper bits of return code. The only flag that is supported * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ } \ __ret; \ }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \ ((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) || \ cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) #define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) #define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) #define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by * sk_to_full_sk(). * * e.g. sock_ops->sk is a request_sock and it is under syncookie mode. * Its listener-sk is not attached to the rsk_listener. * In this case, the caller holds the listener-sk (unlocked), * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with * the listener-sk such that the cgroup-bpf-progs of the * listener-sk will be run. * * Regardless of syncookie mode or not, * calling bpf_setsockopt on listener-sk will not make sense anyway, * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here. */ #define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_SOCK_OPS)) \ __ret = __cgroup_bpf_run_filter_sock_ops(sk, \ sock_ops, \ CGROUP_SOCK_OPS); \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) && (sock_ops)->sk) { \ typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ if (__sk && sk_fullsock(__sk)) \ __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ sock_ops, \ CGROUP_SOCK_OPS); \ } \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_DEVICE)) \ __ret = __cgroup_bpf_check_dev_permission(atype, major, minor, \ access, \ CGROUP_DEVICE); \ \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_SYSCTL)) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ buf, count, pos, \ CGROUP_SYSCTL); \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) && \ cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT)) \ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ optname, optval, \ optlen, \ kernel_optval); \ __ret; \ }) #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ copy_from_sockptr(&__ret, optlen, sizeof(int)); \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ max_optlen, retval) \ ({ \ int __ret = retval; \ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) && \ cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT)) \ if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ tcp_bpf_bypass_getsockopt, \ level, optname)) \ __ret = __cgroup_bpf_run_filter_getsockopt( \ sock, level, optname, optval, optlen, \ max_optlen, retval); \ __ret; \ }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ optlen, retval) \ ({ \ int __ret = retval; \ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ __ret = __cgroup_bpf_run_filter_getsockopt_kern( \ sock, level, optname, optval, optlen, retval); \ __ret; \ }) int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); const struct bpf_func_proto * cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); const struct bpf_func_proto * cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); #else static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog) { return -EINVAL; } static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) { return -EINVAL; } static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { return -EINVAL; } static inline const struct bpf_func_proto * cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } static inline const struct bpf_func_proto * cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; } static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value) { return 0; } static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags) { return 0; } #define cgroup_bpf_enabled(atype) (0) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */
1 1 1 1 1 6 6 6 5 1 1 13 1 13 18 17 7 14 13 35 1 34 34 2 27 27 20 6 26 25 1 18 8 9 17 13 13 32 25 7 16 16 16 16 9 23 1 1 1 2 2 52 44 9 29 22 19 9 9 19 29 4 4 4 2 2 4 52 34 18 52 7 45 1 51 38 15 52 52 7 53 53 7 46 46 46 57 57 57 19 1 4 138 138 136 30 29 83 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 // SPDX-License-Identifier: GPL-2.0-or-later /* -*- linux-c -*- * INET 802.1Q VLAN * Ethernet-type device handling. * * Authors: Ben Greear <greearb@candelatech.com> * Please send support related email to: netdev@vger.kernel.org * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html * * Fixes: Mar 22 2001: Martin Bokaemper <mbokaemper@unispherenetworks.com> * - reset skb->pkt_type on incoming packets when MAC was changed * - see that changed MAC is saddr for outgoing packets * Oct 20, 2001: Ard van Breeman: * - Fix MC-list, finally. * - Flush MC-list on VLAN destroy. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/net_tstamp.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/phy.h> #include <net/arp.h> #include <net/macsec.h> #include "vlan.h" #include "vlanproc.h" #include <linux/if_vlan.h> #include <linux/netpoll.h> /* * Create the VLAN header for an arbitrary protocol layer * * saddr=NULL means use device source address * daddr=NULL means leave destination address (eg unresolved arp) * * This is called when the SKB is moving down the stack towards the * physical devices. */ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_hdr *vhdr; unsigned int vhdrlen = 0; u16 vlan_tci = 0; int rc; if (!(vlan->flags & VLAN_FLAG_REORDER_HDR)) { vhdr = skb_push(skb, VLAN_HLEN); vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority); vhdr->h_vlan_TCI = htons(vlan_tci); /* * Set the protocol type. For a packet of type ETH_P_802_3/2 we * put the length in here instead. */ if (type != ETH_P_802_3 && type != ETH_P_802_2) vhdr->h_vlan_encapsulated_proto = htons(type); else vhdr->h_vlan_encapsulated_proto = htons(len); skb->protocol = vlan->vlan_proto; type = ntohs(vlan->vlan_proto); vhdrlen = VLAN_HLEN; } /* Before delegating work to the lower layer, enter our MAC-address */ if (saddr == NULL) saddr = dev->dev_addr; /* Now make the underlying real hard header */ dev = vlan->real_dev; rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen); if (rc > 0) rc += vhdrlen; return rc; } static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER return netpoll_send_skb(vlan->netpoll, skb); #else BUG(); return NETDEV_TX_OK; #endif } static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; /* Handle non-VLAN frames if they are sent to us, for example by DHCP. * * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ if (vlan->flags & VLAN_FLAG_REORDER_HDR || veth->h_vlan_proto != vlan->vlan_proto) { u16 vlan_tci; vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority); __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci); } skb->dev = vlan->real_dev; len = skb->len; if (unlikely(netpoll_tx_running(dev))) return vlan_netpoll_send_skb(vlan, skb); ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *stats; stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); u64_stats_inc(&stats->tx_packets); u64_stats_add(&stats->tx_bytes, len); u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); } return ret; } static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; unsigned int max_mtu = real_dev->mtu; if (netif_reduces_vlan_mtu(real_dev)) max_mtu -= VLAN_HLEN; if (max_mtu < new_mtu) return -ERANGE; dev->mtu = new_mtu; return 0; } void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) vlan->nr_ingress_mappings--; else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio) vlan->nr_ingress_mappings++; vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio; } int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; /* See if a priority mapping exists.. */ mp = vlan->egress_priority_map[skb_prio & 0xF]; while (mp) { if (mp->priority == skb_prio) { if (mp->vlan_qos && !vlan_qos) vlan->nr_egress_mappings--; else if (!mp->vlan_qos && vlan_qos) vlan->nr_egress_mappings++; mp->vlan_qos = vlan_qos; return 0; } mp = mp->next; } /* Create a new mapping then. */ mp = vlan->egress_priority_map[skb_prio & 0xF]; np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); if (!np) return -ENOBUFS; np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; /* Before inserting this element in hash table, make sure all its fields * are committed to memory. * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() */ smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; if (vlan_qos) vlan->nr_egress_mappings++; return 0; } /* Flags are defined in the vlan_flags enum in * include/uapi/linux/if_vlan.h file. */ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP | VLAN_FLAG_BRIDGE_BINDING)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_GVRP) { if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); else vlan_gvrp_request_leave(dev); } if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) { if (vlan->flags & VLAN_FLAG_MVRP) vlan_mvrp_request_join(dev); else vlan_mvrp_request_leave(dev); } return 0; } void vlan_dev_get_realdev_name(const struct net_device *dev, char *result, size_t size) { strscpy_pad(result, vlan_dev_priv(dev)->real_dev->name, size); } bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev) { if (dev->addr_assign_type != NET_ADDR_STOLEN) return false; eth_hw_addr_set(dev, real_dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return true; } static int vlan_dev_open(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; int err; if (!(real_dev->flags & IFF_UP) && !(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) return -ENETDOWN; if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr) && !vlan_dev_inherit_address(dev, real_dev)) { err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) goto out; } if (dev->flags & IFF_ALLMULTI) { err = dev_set_allmulti(real_dev, 1); if (err < 0) goto del_unicast; } if (dev->flags & IFF_PROMISC) { err = dev_set_promiscuity(real_dev, 1); if (err < 0) goto clear_allmulti; } ether_addr_copy(vlan->real_dev_addr, real_dev->dev_addr); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); if (vlan->flags & VLAN_FLAG_MVRP) vlan_mvrp_request_join(dev); if (netif_carrier_ok(real_dev) && !(vlan->flags & VLAN_FLAG_BRIDGE_BINDING)) netif_carrier_on(dev); return 0; clear_allmulti: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); del_unicast: if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); out: netif_carrier_off(dev); return err; } static int vlan_dev_stop(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); if (dev->flags & IFF_PROMISC) dev_set_promiscuity(real_dev, -1); if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING)) netif_carrier_off(dev); return 0; } static int vlan_dev_set_mac_address(struct net_device *dev, void *p) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; struct sockaddr *addr = p; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; if (!(dev->flags & IFF_UP)) goto out; if (!ether_addr_equal(addr->sa_data, real_dev->dev_addr)) { err = dev_uc_add(real_dev, addr->sa_data); if (err < 0) return err; } if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); out: eth_hw_addr_set(dev, addr->sa_data); return 0; } static int vlan_hwtstamp_get(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; return generic_hwtstamp_get_lower(real_dev, cfg); } static int vlan_hwtstamp_set(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; if (!net_eq(dev_net(dev), dev_net(real_dev))) return -EOPNOTSUPP; return generic_hwtstamp_set_lower(real_dev, cfg, extack); } static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; strscpy_pad(ifrr.ifr_name, real_dev->name, IFNAMSIZ); ifrr.ifr_ifru = ifr->ifr_ifru; switch (cmd) { case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); break; } if (!err) ifr->ifr_ifru = ifrr.ifr_ifru; return err; } static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int err = 0; if (netif_device_present(real_dev) && ops->ndo_neigh_setup) err = ops->ndo_neigh_setup(real_dev, pa); return err; } #if IS_ENABLED(CONFIG_FCOE) static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = 0; if (ops->ndo_fcoe_ddp_setup) rc = ops->ndo_fcoe_ddp_setup(real_dev, xid, sgl, sgc); return rc; } static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int len = 0; if (ops->ndo_fcoe_ddp_done) len = ops->ndo_fcoe_ddp_done(real_dev, xid); return len; } static int vlan_dev_fcoe_enable(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = -EINVAL; if (ops->ndo_fcoe_enable) rc = ops->ndo_fcoe_enable(real_dev); return rc; } static int vlan_dev_fcoe_disable(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = -EINVAL; if (ops->ndo_fcoe_disable) rc = ops->ndo_fcoe_disable(real_dev); return rc; } static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = 0; if (ops->ndo_fcoe_ddp_target) rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); return rc; } #endif #ifdef NETDEV_FCOE_WWNN static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = -EINVAL; if (ops->ndo_fcoe_get_wwn) rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } #endif static void vlan_dev_change_rx_flags(struct net_device *dev, int change) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; if (dev->flags & IFF_UP) { if (change & IFF_ALLMULTI) dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); if (change & IFF_PROMISC) dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); } } static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* * vlan network devices have devices nesting below it, and are a special * "super class" of normal network devices; split their locks off into a * separate class since they always nest. */ static struct lock_class_key vlan_netdev_xmit_lock_key; static struct lock_class_key vlan_netdev_addr_lock_key; static void vlan_dev_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, void *unused) { lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key); } static void vlan_dev_set_lockdep_class(struct net_device *dev) { lockdep_set_class(&dev->addr_list_lock, &vlan_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); } static __be16 vlan_parse_protocol(const struct sk_buff *skb) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); return __vlan_get_protocol(skb, veth->h_vlan_proto, NULL); } static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .parse = eth_header_parse, .parse_protocol = vlan_parse_protocol, }; static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; if (saddr == NULL) saddr = dev->dev_addr; return dev_hard_header(skb, real_dev, type, daddr, saddr, len); } static const struct header_ops vlan_passthru_header_ops = { .create = vlan_passthru_hard_header, .parse = eth_header_parse, .parse_protocol = vlan_parse_protocol, }; static struct device_type vlan_type = { .name = "vlan", }; static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; netif_carrier_off(dev); /* IFF_BROADCAST|IFF_MULTICAST; ??? */ dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_MASTER | IFF_SLAVE); dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); if (vlan->flags & VLAN_FLAG_BRIDGE_BINDING) dev->state |= (1 << __LINK_STATE_NOCARRIER); dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; if (real_dev->vlan_features & NETIF_F_HW_MACSEC) dev->hw_features |= NETIF_F_HW_MACSEC; dev->features |= dev->hw_features | NETIF_F_LLTX; netif_inherit_tso_max(dev, real_dev); if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE; dev->hw_enc_features = vlan_tnl_features(real_dev); dev->mpls_features = real_dev->mpls_features; /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; if (is_zero_ether_addr(dev->dev_addr)) { eth_hw_addr_set(dev, real_dev->dev_addr); dev->addr_assign_type = NET_ADDR_STOLEN; } if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); #if IS_ENABLED(CONFIG_FCOE) dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; #endif dev->needed_headroom = real_dev->needed_headroom; if (vlan_hw_offload_capable(real_dev->features, vlan->vlan_proto)) { dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { dev->header_ops = &vlan_header_ops; dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; } dev->netdev_ops = &vlan_netdev_ops; SET_NETDEV_DEVTYPE(dev, &vlan_type); vlan_dev_set_lockdep_class(dev); vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) return -ENOMEM; /* Get vlan's reference to real_dev */ netdev_hold(real_dev, &vlan->dev_tracker, GFP_KERNEL); return 0; } /* Note: this function might be called multiple times for the same device. */ void vlan_dev_free_egress_priority(const struct net_device *dev) { struct vlan_priority_tci_mapping *pm; struct vlan_dev_priv *vlan = vlan_dev_priv(dev); int i; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { while ((pm = vlan->egress_priority_map[i]) != NULL) { vlan->egress_priority_map[i] = pm->next; kfree(pm); } } } static void vlan_dev_uninit(struct net_device *dev) { vlan_dev_free_egress_priority(dev); } static netdev_features_t vlan_dev_fix_features(struct net_device *dev, netdev_features_t features) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; netdev_features_t lower_features; lower_features = netdev_intersect_features((real_dev->vlan_features | NETIF_F_RXCSUM), real_dev->features); /* Add HW_CSUM setting to preserve user ability to control * checksum offload on the vlan device. */ if (lower_features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) lower_features |= NETIF_F_HW_CSUM; features = netdev_intersect_features(features, lower_features); features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE); features |= NETIF_F_LLTX; return features; } static int vlan_ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); return __ethtool_get_link_ksettings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strscpy(info->driver, vlan_fullname, sizeof(info->driver)); strscpy(info->version, vlan_version, sizeof(info->version)); strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); } static int vlan_ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); return ethtool_get_ts_info_by_layer(vlan->real_dev, info); } static void vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct vlan_pcpu_stats *p; u32 rx_errors = 0, tx_dropped = 0; int i; for_each_possible_cpu(i) { u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; unsigned int start; p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { start = u64_stats_fetch_begin(&p->syncp); rxpackets = u64_stats_read(&p->rx_packets); rxbytes = u64_stats_read(&p->rx_bytes); rxmulticast = u64_stats_read(&p->rx_multicast); txpackets = u64_stats_read(&p->tx_packets); txbytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += rxpackets; stats->rx_bytes += rxbytes; stats->multicast += rxmulticast; stats->tx_packets += txpackets; stats->tx_bytes += txbytes; /* rx_errors & tx_dropped are u32 */ rx_errors += READ_ONCE(p->rx_errors); tx_dropped += READ_ONCE(p->tx_dropped); } stats->rx_errors = rx_errors; stats->tx_dropped = tx_dropped; } #ifdef CONFIG_NET_POLL_CONTROLLER static void vlan_dev_poll_controller(struct net_device *dev) { return; } static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; struct netpoll *netpoll; int err = 0; netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); err = -ENOMEM; if (!netpoll) goto out; err = __netpoll_setup(netpoll, real_dev); if (err) { kfree(netpoll); goto out; } vlan->netpoll = netpoll; out: return err; } static void vlan_dev_netpoll_cleanup(struct net_device *dev) { struct vlan_dev_priv *vlan= vlan_dev_priv(dev); struct netpoll *netpoll = vlan->netpoll; if (!netpoll) return; vlan->netpoll = NULL; __netpoll_free(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ static int vlan_dev_get_iflink(const struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; return real_dev->ifindex; } static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path) { struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev); path->type = DEV_PATH_VLAN; path->encap.id = vlan->vlan_id; path->encap.proto = vlan->vlan_proto; path->dev = ctx->dev; ctx->dev = vlan->real_dev; if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan)) return -ENOSPC; ctx->vlan[ctx->num_vlans].id = vlan->vlan_id; ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto; ctx->num_vlans++; return 0; } #if IS_ENABLED(CONFIG_MACSEC) static const struct macsec_ops *vlan_get_macsec_ops(const struct macsec_context *ctx) { return vlan_dev_priv(ctx->netdev)->real_dev->macsec_ops; } static int vlan_macsec_offload(int (* const func)(struct macsec_context *), struct macsec_context *ctx) { if (unlikely(!func)) return 0; return (*func)(ctx); } static int vlan_macsec_dev_open(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_dev_open, ctx); } static int vlan_macsec_dev_stop(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_dev_stop, ctx); } static int vlan_macsec_add_secy(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_add_secy, ctx); } static int vlan_macsec_upd_secy(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_upd_secy, ctx); } static int vlan_macsec_del_secy(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_del_secy, ctx); } static int vlan_macsec_add_rxsc(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_add_rxsc, ctx); } static int vlan_macsec_upd_rxsc(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_upd_rxsc, ctx); } static int vlan_macsec_del_rxsc(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_del_rxsc, ctx); } static int vlan_macsec_add_rxsa(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_add_rxsa, ctx); } static int vlan_macsec_upd_rxsa(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_upd_rxsa, ctx); } static int vlan_macsec_del_rxsa(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_del_rxsa, ctx); } static int vlan_macsec_add_txsa(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_add_txsa, ctx); } static int vlan_macsec_upd_txsa(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_upd_txsa, ctx); } static int vlan_macsec_del_txsa(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_del_txsa, ctx); } static int vlan_macsec_get_dev_stats(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_get_dev_stats, ctx); } static int vlan_macsec_get_tx_sc_stats(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_get_tx_sc_stats, ctx); } static int vlan_macsec_get_tx_sa_stats(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_get_tx_sa_stats, ctx); } static int vlan_macsec_get_rx_sc_stats(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_get_rx_sc_stats, ctx); } static int vlan_macsec_get_rx_sa_stats(struct macsec_context *ctx) { const struct macsec_ops *ops = vlan_get_macsec_ops(ctx); if (!ops) return -EOPNOTSUPP; return vlan_macsec_offload(ops->mdo_get_rx_sa_stats, ctx); } static const struct macsec_ops macsec_offload_ops = { /* Device wide */ .mdo_dev_open = vlan_macsec_dev_open, .mdo_dev_stop = vlan_macsec_dev_stop, /* SecY */ .mdo_add_secy = vlan_macsec_add_secy, .mdo_upd_secy = vlan_macsec_upd_secy, .mdo_del_secy = vlan_macsec_del_secy, /* Security channels */ .mdo_add_rxsc = vlan_macsec_add_rxsc, .mdo_upd_rxsc = vlan_macsec_upd_rxsc, .mdo_del_rxsc = vlan_macsec_del_rxsc, /* Security associations */ .mdo_add_rxsa = vlan_macsec_add_rxsa, .mdo_upd_rxsa = vlan_macsec_upd_rxsa, .mdo_del_rxsa = vlan_macsec_del_rxsa, .mdo_add_txsa = vlan_macsec_add_txsa, .mdo_upd_txsa = vlan_macsec_upd_txsa, .mdo_del_txsa = vlan_macsec_del_txsa, /* Statistics */ .mdo_get_dev_stats = vlan_macsec_get_dev_stats, .mdo_get_tx_sc_stats = vlan_macsec_get_tx_sc_stats, .mdo_get_tx_sa_stats = vlan_macsec_get_tx_sa_stats, .mdo_get_rx_sc_stats = vlan_macsec_get_rx_sc_stats, .mdo_get_rx_sa_stats = vlan_macsec_get_rx_sa_stats, }; #endif static const struct ethtool_ops vlan_ethtool_ops = { .get_link_ksettings = vlan_ethtool_get_link_ksettings, .get_drvinfo = vlan_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, .get_ts_info = vlan_ethtool_get_ts_info, }; static const struct net_device_ops vlan_netdev_ops = { .ndo_change_mtu = vlan_dev_change_mtu, .ndo_init = vlan_dev_init, .ndo_uninit = vlan_dev_uninit, .ndo_open = vlan_dev_open, .ndo_stop = vlan_dev_stop, .ndo_start_xmit = vlan_dev_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = vlan_dev_set_mac_address, .ndo_set_rx_mode = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_eth_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, .ndo_get_stats64 = vlan_dev_get_stats64, #if IS_ENABLED(CONFIG_FCOE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif #ifdef NETDEV_FCOE_WWNN .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = vlan_dev_poll_controller, .ndo_netpoll_setup = vlan_dev_netpoll_setup, .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, .ndo_get_iflink = vlan_dev_get_iflink, .ndo_fill_forward_path = vlan_dev_fill_forward_path, .ndo_hwtstamp_get = vlan_hwtstamp_get, .ndo_hwtstamp_set = vlan_hwtstamp_set, }; static void vlan_dev_free(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; /* Get rid of the vlan's reference to real_dev */ netdev_put(vlan->real_dev, &vlan->dev_tracker); } void vlan_setup(struct net_device *dev) { ether_setup(dev); dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE; dev->priv_flags |= IFF_UNICAST_FLT; dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); dev->netdev_ops = &vlan_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; #if IS_ENABLED(CONFIG_MACSEC) dev->macsec_ops = &macsec_offload_ops; #endif dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; eth_zero_addr(dev->broadcast); }
18 18 18 3 3 3 1 2 1 1 1 1 1 10 10 3 8 7 3 8 1 3 3 3 3 5 5 1 4 4 1 1 2 2 1 4 1 2 2 1 1 1 2 3 1 2 4 10 2 5 3 8 9 1 8 2 10 10 10 10 10 10 10 1 9 50 50 6 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * * Regular file handling primitives for NTFS-based filesystems. * */ #include <linux/backing-dev.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/compat.h> #include <linux/falloc.h> #include <linux/fiemap.h> #include "debug.h" #include "ntfs.h" #include "ntfs_fs.h" static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) { struct fstrim_range __user *user_range; struct fstrim_range range; struct block_device *dev; int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; dev = sbi->sb->s_bdev; if (!bdev_max_discard_sectors(dev)) return -EOPNOTSUPP; user_range = (struct fstrim_range __user *)arg; if (copy_from_user(&range, user_range, sizeof(range))) return -EFAULT; range.minlen = max_t(u32, range.minlen, bdev_discard_granularity(dev)); err = ntfs_trim_fs(sbi, &range); if (err < 0) return err; if (copy_to_user(user_range, &range, sizeof(range))) return -EFAULT; return 0; } long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; switch (cmd) { case FITRIM: return ntfs_ioctl_fitrim(sbi, arg); } return -ENOTTY; /* Inappropriate ioctl for device. */ } #ifdef CONFIG_COMPAT long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg) { return ntfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); } #endif /* * ntfs_getattr - inode_operations::getattr */ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags) { struct inode *inode = d_inode(path->dentry); struct ntfs_inode *ni = ntfs_i(inode); if (is_compressed(ni)) stat->attributes |= STATX_ATTR_COMPRESSED; if (is_encrypted(ni)) stat->attributes |= STATX_ATTR_ENCRYPTED; stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED; generic_fillattr(idmap, request_mask, inode, stat); stat->result_mask |= STATX_BTIME; stat->btime = ni->i_crtime; stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */ return 0; } static int ntfs_extend_initialized_size(struct file *file, struct ntfs_inode *ni, const loff_t valid, const loff_t new_valid) { struct inode *inode = &ni->vfs_inode; struct address_space *mapping = inode->i_mapping; struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; loff_t pos = valid; int err; if (is_resident(ni)) { ni->i_valid = new_valid; return 0; } WARN_ON(is_compressed(ni)); WARN_ON(valid >= new_valid); for (;;) { u32 zerofrom, len; struct page *page; u8 bits; CLST vcn, lcn, clen; if (is_sparsed(ni)) { bits = sbi->cluster_bits; vcn = pos >> bits; err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false); if (err) goto out; if (lcn == SPARSE_LCN) { pos = ((loff_t)clen + vcn) << bits; ni->i_valid = pos; goto next; } } zerofrom = pos & (PAGE_SIZE - 1); len = PAGE_SIZE - zerofrom; if (pos + len > new_valid) len = new_valid - pos; err = ntfs_write_begin(file, mapping, pos, len, &page, NULL); if (err) goto out; zero_user_segment(page, zerofrom, PAGE_SIZE); /* This function in any case puts page. */ err = ntfs_write_end(file, mapping, pos, len, len, page, NULL); if (err < 0) goto out; pos += len; next: if (pos >= new_valid) break; balance_dirty_pages_ratelimited(mapping); cond_resched(); } return 0; out: ni->i_valid = valid; ntfs_inode_warn(inode, "failed to extend initialized size to %llx.", new_valid); return err; } /* * ntfs_zero_range - Helper function for punch_hole. * * It zeroes a range [vbo, vbo_to). */ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) { int err = 0; struct address_space *mapping = inode->i_mapping; u32 blocksize = i_blocksize(inode); pgoff_t idx = vbo >> PAGE_SHIFT; u32 from = vbo & (PAGE_SIZE - 1); pgoff_t idx_end = (vbo_to + PAGE_SIZE - 1) >> PAGE_SHIFT; loff_t page_off; struct buffer_head *head, *bh; u32 bh_next, bh_off, to; sector_t iblock; struct folio *folio; bool dirty = false; for (; idx < idx_end; idx += 1, from = 0) { page_off = (loff_t)idx << PAGE_SHIFT; to = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off) : PAGE_SIZE; iblock = page_off >> inode->i_blkbits; folio = __filemap_get_folio(mapping, idx, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mapping_gfp_constraint(mapping, ~__GFP_FS)); if (IS_ERR(folio)) return PTR_ERR(folio); head = folio_buffers(folio); if (!head) head = create_empty_buffers(folio, blocksize, 0); bh = head; bh_off = 0; do { bh_next = bh_off + blocksize; if (bh_next <= from || bh_off >= to) continue; if (!buffer_mapped(bh)) { ntfs_get_block(inode, iblock, bh, 0); /* Unmapped? It's a hole - nothing to do. */ if (!buffer_mapped(bh)) continue; } /* Ok, it's mapped. Make sure it's up-to-date. */ if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); else if (bh_read(bh, 0) < 0) { err = -EIO; folio_unlock(folio); folio_put(folio); goto out; } mark_buffer_dirty(bh); } while (bh_off = bh_next, iblock += 1, head != (bh = bh->b_this_page)); folio_zero_segment(folio, from, to); dirty = true; folio_unlock(folio); folio_put(folio); cond_resched(); } out: if (dirty) mark_inode_dirty(inode); return err; } /* * ntfs_file_mmap - file_operations::mmap */ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); u64 from = ((u64)vma->vm_pgoff << PAGE_SHIFT); bool rw = vma->vm_flags & VM_WRITE; int err; if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; if (is_encrypted(ni)) { ntfs_inode_warn(inode, "mmap encrypted not supported"); return -EOPNOTSUPP; } if (is_dedup(ni)) { ntfs_inode_warn(inode, "mmap deduplicated not supported"); return -EOPNOTSUPP; } if (is_compressed(ni) && rw) { ntfs_inode_warn(inode, "mmap(write) compressed not supported"); return -EOPNOTSUPP; } if (rw) { u64 to = min_t(loff_t, i_size_read(inode), from + vma->vm_end - vma->vm_start); if (is_sparsed(ni)) { /* Allocate clusters for rw map. */ struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; CLST lcn, len; CLST vcn = from >> sbi->cluster_bits; CLST end = bytes_to_cluster(sbi, to); bool new; for (; vcn < end; vcn += len) { err = attr_data_get_block(ni, vcn, 1, &lcn, &len, &new, true); if (err) goto out; } } if (ni->i_valid < to) { if (!inode_trylock(inode)) { err = -EAGAIN; goto out; } err = ntfs_extend_initialized_size(file, ni, ni->i_valid, to); inode_unlock(inode); if (err) goto out; } } err = generic_file_mmap(file, vma); out: return err; } static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, struct file *file) { struct ntfs_inode *ni = ntfs_i(inode); struct address_space *mapping = inode->i_mapping; loff_t end = pos + count; bool extend_init = file && pos > ni->i_valid; int err; if (end <= inode->i_size && !extend_init) return 0; /* Mark rw ntfs as dirty. It will be cleared at umount. */ ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY); if (end > inode->i_size) { err = ntfs_set_size(inode, end); if (err) goto out; } if (extend_init && !is_compressed(ni)) { err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos); if (err) goto out; } else { err = 0; } inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); if (IS_SYNC(inode)) { int err2; err = filemap_fdatawrite_range(mapping, pos, end - 1); err2 = sync_mapping_buffers(mapping); if (!err) err = err2; err2 = write_inode_now(inode, 1); if (!err) err = err2; if (!err) err = filemap_fdatawait_range(mapping, pos, end - 1); } out: return err; } static int ntfs_truncate(struct inode *inode, loff_t new_size) { struct super_block *sb = inode->i_sb; struct ntfs_inode *ni = ntfs_i(inode); int err, dirty = 0; u64 new_valid; if (!S_ISREG(inode->i_mode)) return 0; if (is_compressed(ni)) { if (ni->i_valid > new_size) ni->i_valid = new_size; } else { err = block_truncate_page(inode->i_mapping, new_size, ntfs_get_block); if (err) return err; } new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); truncate_setsize(inode, new_size); ni_lock(ni); down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, &new_valid, ni->mi.sbi->options->prealloc, NULL); up_write(&ni->file.run_lock); if (new_valid < ni->i_valid) ni->i_valid = new_valid; ni_unlock(ni); ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (!IS_DIRSYNC(inode)) { dirty = 1; } else { err = ntfs_sync_inode(inode); if (err) return err; } if (dirty) mark_inode_dirty(inode); /*ntfs_flush_inodes(inode->i_sb, inode, NULL);*/ return 0; } /* * ntfs_fallocate * * Preallocate space for a file. This implements ntfs's fallocate file * operation, which gets called from sys_fallocate system call. User * space requests 'len' bytes at 'vbo'. If FALLOC_FL_KEEP_SIZE is set * we just allocate clusters without zeroing them out. Otherwise we * allocate and zero out clusters via an expanding truncate. */ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) { struct inode *inode = file->f_mapping->host; struct address_space *mapping = inode->i_mapping; struct super_block *sb = inode->i_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; struct ntfs_inode *ni = ntfs_i(inode); loff_t end = vbo + len; loff_t vbo_down = round_down(vbo, max_t(unsigned long, sbi->cluster_size, PAGE_SIZE)); bool is_supported_holes = is_sparsed(ni) || is_compressed(ni); loff_t i_size, new_size; bool map_locked; int err; /* No support for dir. */ if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; /* * vfs_fallocate checks all possible combinations of mode. * Do additional checks here before ntfs_set_state(dirty). */ if (mode & FALLOC_FL_PUNCH_HOLE) { if (!is_supported_holes) return -EOPNOTSUPP; } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { } else if (mode & FALLOC_FL_INSERT_RANGE) { if (!is_supported_holes) return -EOPNOTSUPP; } else if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)) { ntfs_inode_warn(inode, "fallocate(0x%x) is not supported", mode); return -EOPNOTSUPP; } ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); inode_lock(inode); i_size = inode->i_size; new_size = max(end, i_size); map_locked = false; if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { /* Should never be here, see ntfs_file_open. */ err = -EOPNOTSUPP; goto out; } if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)) { inode_dio_wait(inode); filemap_invalidate_lock(mapping); map_locked = true; } if (mode & FALLOC_FL_PUNCH_HOLE) { u32 frame_size; loff_t mask, vbo_a, end_a, tmp; err = filemap_write_and_wait_range(mapping, vbo_down, LLONG_MAX); if (err) goto out; truncate_pagecache(inode, vbo_down); ni_lock(ni); err = attr_punch_hole(ni, vbo, len, &frame_size); ni_unlock(ni); if (!err) goto ok; if (err != E_NTFS_NOTALIGNED) goto out; /* Process not aligned punch. */ err = 0; mask = frame_size - 1; vbo_a = (vbo + mask) & ~mask; end_a = end & ~mask; tmp = min(vbo_a, end); if (tmp > vbo) { err = ntfs_zero_range(inode, vbo, tmp); if (err) goto out; } if (vbo < end_a && end_a < end) { err = ntfs_zero_range(inode, end_a, end); if (err) goto out; } /* Aligned punch_hole */ if (end_a > vbo_a) { ni_lock(ni); err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL); ni_unlock(ni); if (err) goto out; } } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { /* * Write tail of the last page before removed range since * it will get removed from the page cache below. */ err = filemap_write_and_wait_range(mapping, vbo_down, vbo); if (err) goto out; /* * Write data that will be shifted to preserve them * when discarding page cache below. */ err = filemap_write_and_wait_range(mapping, end, LLONG_MAX); if (err) goto out; truncate_pagecache(inode, vbo_down); ni_lock(ni); err = attr_collapse_range(ni, vbo, len); ni_unlock(ni); } else if (mode & FALLOC_FL_INSERT_RANGE) { /* Check new size. */ err = inode_newsize_ok(inode, new_size); if (err) goto out; /* Write out all dirty pages. */ err = filemap_write_and_wait_range(mapping, vbo_down, LLONG_MAX); if (err) goto out; truncate_pagecache(inode, vbo_down); ni_lock(ni); err = attr_insert_range(ni, vbo, len); ni_unlock(ni); if (err) goto out; } else { /* Check new size. */ u8 cluster_bits = sbi->cluster_bits; /* generic/213: expected -ENOSPC instead of -EFBIG. */ if (!is_supported_holes) { loff_t to_alloc = new_size - inode_get_bytes(inode); if (to_alloc > 0 && (to_alloc >> cluster_bits) > wnd_zeroes(&sbi->used.bitmap)) { err = -ENOSPC; goto out; } } err = inode_newsize_ok(inode, new_size); if (err) goto out; if (new_size > i_size) { /* * Allocate clusters, do not change 'valid' size. */ err = ntfs_set_size(inode, new_size); if (err) goto out; } if (is_supported_holes) { CLST vcn = vbo >> cluster_bits; CLST cend = bytes_to_cluster(sbi, end); CLST cend_v = bytes_to_cluster(sbi, ni->i_valid); CLST lcn, clen; bool new; if (cend_v > cend) cend_v = cend; /* * Allocate and zero new clusters. * Zeroing these clusters may be too long. */ for (; vcn < cend_v; vcn += clen) { err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, &clen, &new, true); if (err) goto out; } /* * Allocate but not zero new clusters. */ for (; vcn < cend; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, &clen, &new, false); if (err) goto out; } } if (mode & FALLOC_FL_KEEP_SIZE) { ni_lock(ni); /* True - Keep preallocated. */ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, i_size, &ni->i_valid, true, NULL); ni_unlock(ni); if (err) goto out; } else if (new_size > i_size) { i_size_write(inode, new_size); } } ok: err = file_modified(file); if (err) goto out; out: if (map_locked) filemap_invalidate_unlock(mapping); if (!err) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); } inode_unlock(inode); return err; } /* * ntfs3_setattr - inode_operations::setattr */ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct ntfs_inode *ni = ntfs_i(inode); u32 ia_valid = attr->ia_valid; umode_t mode = inode->i_mode; int err; if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; err = setattr_prepare(idmap, dentry, attr); if (err) goto out; if (ia_valid & ATTR_SIZE) { loff_t newsize, oldsize; if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { /* Should never be here, see ntfs_file_open(). */ err = -EOPNOTSUPP; goto out; } inode_dio_wait(inode); oldsize = i_size_read(inode); newsize = attr->ia_size; if (newsize <= oldsize) err = ntfs_truncate(inode, newsize); else err = ntfs_extend(inode, newsize, 0, NULL); if (err) goto out; ni->ni_flags |= NI_FLAG_UPDATE_PARENT; i_size_write(inode, newsize); } setattr_copy(idmap, inode, attr); if (mode != inode->i_mode) { err = ntfs_acl_chmod(idmap, dentry); if (err) goto out; /* Linux 'w' -> Windows 'ro'. */ if (0222 & inode->i_mode) ni->std_fa &= ~FILE_ATTRIBUTE_READONLY; else ni->std_fa |= FILE_ATTRIBUTE_READONLY; } if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) ntfs_save_wsl_perm(inode, NULL); mark_inode_dirty(inode); out: return err; } static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; } if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) { ntfs_inode_warn(inode, "direct i/o + compressed not supported"); return -EOPNOTSUPP; } #ifndef CONFIG_NTFS3_LZX_XPRESS if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) { ntfs_inode_warn( inode, "activate CONFIG_NTFS3_LZX_XPRESS to read external compressed files"); return -EOPNOTSUPP; } #endif if (is_dedup(ni)) { ntfs_inode_warn(inode, "read deduplicated not supported"); return -EOPNOTSUPP; } return generic_file_read_iter(iocb, iter); } static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct inode *inode = in->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; } #ifndef CONFIG_NTFS3_LZX_XPRESS if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) { ntfs_inode_warn( inode, "activate CONFIG_NTFS3_LZX_XPRESS to read external compressed files"); return -EOPNOTSUPP; } #endif if (is_dedup(ni)) { ntfs_inode_warn(inode, "read deduplicated not supported"); return -EOPNOTSUPP; } return filemap_splice_read(in, ppos, pipe, len, flags); } /* * ntfs_get_frame_pages * * Return: Array of locked pages. */ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index, struct page **pages, u32 pages_per_frame, bool *frame_uptodate) { gfp_t gfp_mask = mapping_gfp_mask(mapping); u32 npages; *frame_uptodate = true; for (npages = 0; npages < pages_per_frame; npages++, index++) { struct page *page; page = find_or_create_page(mapping, index, gfp_mask); if (!page) { while (npages--) { page = pages[npages]; unlock_page(page); put_page(page); } return -ENOMEM; } if (!PageUptodate(page)) *frame_uptodate = false; pages[npages] = page; } return 0; } /* * ntfs_compress_write - Helper for ntfs_file_write_iter() (compressed files). */ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) { int err; struct file *file = iocb->ki_filp; size_t count = iov_iter_count(from); loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(file); loff_t i_size = i_size_read(inode); struct address_space *mapping = inode->i_mapping; struct ntfs_inode *ni = ntfs_i(inode); u64 valid = ni->i_valid; struct ntfs_sb_info *sbi = ni->mi.sbi; struct page *page, **pages = NULL; size_t written = 0; u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits; u32 frame_size = 1u << frame_bits; u32 pages_per_frame = frame_size >> PAGE_SHIFT; u32 ip, off; CLST frame; u64 frame_vbo; pgoff_t index; bool frame_uptodate; if (frame_size < PAGE_SIZE) { /* * frame_size == 8K if cluster 512 * frame_size == 64K if cluster 4096 */ ntfs_inode_warn(inode, "page size is bigger than frame size"); return -EOPNOTSUPP; } pages = kmalloc_array(pages_per_frame, sizeof(struct page *), GFP_NOFS); if (!pages) return -ENOMEM; err = file_remove_privs(file); if (err) goto out; err = file_update_time(file); if (err) goto out; /* Zero range [valid : pos). */ while (valid < pos) { CLST lcn, clen; frame = valid >> frame_bits; frame_vbo = valid & ~(frame_size - 1); off = valid & (frame_size - 1); err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 1, &lcn, &clen, NULL, false); if (err) goto out; if (lcn == SPARSE_LCN) { ni->i_valid = valid = frame_vbo + ((u64)clen << sbi->cluster_bits); continue; } /* Load full frame. */ err = ntfs_get_frame_pages(mapping, frame_vbo >> PAGE_SHIFT, pages, pages_per_frame, &frame_uptodate); if (err) goto out; if (!frame_uptodate && off) { err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame); if (err) { for (ip = 0; ip < pages_per_frame; ip++) { page = pages[ip]; unlock_page(page); put_page(page); } goto out; } } ip = off >> PAGE_SHIFT; off = offset_in_page(valid); for (; ip < pages_per_frame; ip++, off = 0) { page = pages[ip]; zero_user_segment(page, off, PAGE_SIZE); flush_dcache_page(page); SetPageUptodate(page); } ni_lock(ni); err = ni_write_frame(ni, pages, pages_per_frame); ni_unlock(ni); for (ip = 0; ip < pages_per_frame; ip++) { page = pages[ip]; SetPageUptodate(page); unlock_page(page); put_page(page); } if (err) goto out; ni->i_valid = valid = frame_vbo + frame_size; } /* Copy user data [pos : pos + count). */ while (count) { size_t copied, bytes; off = pos & (frame_size - 1); bytes = frame_size - off; if (bytes > count) bytes = count; frame_vbo = pos & ~(frame_size - 1); index = frame_vbo >> PAGE_SHIFT; if (unlikely(fault_in_iov_iter_readable(from, bytes))) { err = -EFAULT; goto out; } /* Load full frame. */ err = ntfs_get_frame_pages(mapping, index, pages, pages_per_frame, &frame_uptodate); if (err) goto out; if (!frame_uptodate) { loff_t to = pos + bytes; if (off || (to < i_size && (to & (frame_size - 1)))) { err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame); if (err) { for (ip = 0; ip < pages_per_frame; ip++) { page = pages[ip]; unlock_page(page); put_page(page); } goto out; } } } WARN_ON(!bytes); copied = 0; ip = off >> PAGE_SHIFT; off = offset_in_page(pos); /* Copy user data to pages. */ for (;;) { size_t cp, tail = PAGE_SIZE - off; page = pages[ip]; cp = copy_page_from_iter_atomic(page, off, min(tail, bytes), from); flush_dcache_page(page); copied += cp; bytes -= cp; if (!bytes || !cp) break; if (cp < tail) { off += cp; } else { ip++; off = 0; } } ni_lock(ni); err = ni_write_frame(ni, pages, pages_per_frame); ni_unlock(ni); for (ip = 0; ip < pages_per_frame; ip++) { page = pages[ip]; ClearPageDirty(page); SetPageUptodate(page); unlock_page(page); put_page(page); } if (err) goto out; /* * We can loop for a long time in here. Be nice and allow * us to schedule out to avoid softlocking if preempt * is disabled. */ cond_resched(); pos += copied; written += copied; count = iov_iter_count(from); } out: kfree(pages); if (err < 0) return err; iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; if (iocb->ki_pos > i_size) i_size_write(inode, iocb->ki_pos); return written; } /* * ntfs_file_write_iter - file_operations::write_iter */ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; int err; struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; } if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) { ntfs_inode_warn(inode, "direct i/o + compressed not supported"); return -EOPNOTSUPP; } if (is_dedup(ni)) { ntfs_inode_warn(inode, "write into deduplicated not supported"); return -EOPNOTSUPP; } if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) return -EAGAIN; inode_lock(inode); } ret = generic_write_checks(iocb, from); if (ret <= 0) goto out; err = file_modified(iocb->ki_filp); if (err) { ret = err; goto out; } if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { /* Should never be here, see ntfs_file_open(). */ ret = -EOPNOTSUPP; goto out; } ret = ntfs_extend(inode, iocb->ki_pos, ret, file); if (ret) goto out; ret = is_compressed(ni) ? ntfs_compress_write(iocb, from) : __generic_file_write_iter(iocb, from); out: inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } /* * ntfs_file_open - file_operations::open */ int ntfs_file_open(struct inode *inode, struct file *file) { struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; if (unlikely((is_compressed(ni) || is_encrypted(ni)) && (file->f_flags & O_DIRECT))) { return -EOPNOTSUPP; } /* Decompress "external compressed" file if opened for rw. */ if ((ni->ni_flags & NI_FLAG_COMPRESSED_MASK) && (file->f_flags & (O_WRONLY | O_RDWR | O_TRUNC))) { #ifdef CONFIG_NTFS3_LZX_XPRESS int err = ni_decompress_file(ni); if (err) return err; #else ntfs_inode_warn( inode, "activate CONFIG_NTFS3_LZX_XPRESS to write external compressed files"); return -EOPNOTSUPP; #endif } return generic_file_open(inode, file); } /* * ntfs_file_release - file_operations::release */ static int ntfs_file_release(struct inode *inode, struct file *file) { struct ntfs_inode *ni = ntfs_i(inode); struct ntfs_sb_info *sbi = ni->mi.sbi; int err = 0; /* If we are last writer on the inode, drop the block reservation. */ if (sbi->options->prealloc && ((file->f_mode & FMODE_WRITE) && atomic_read(&inode->i_writecount) == 1)) { ni_lock(ni); down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, i_size_read(inode), &ni->i_valid, false, NULL); up_write(&ni->file.run_lock); ni_unlock(ni); } return err; } /* * ntfs_fiemap - file_operations::fiemap */ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { int err; struct ntfs_inode *ni = ntfs_i(inode); err = fiemap_prep(inode, fieinfo, start, &len, ~FIEMAP_FLAG_XATTR); if (err) return err; ni_lock(ni); err = ni_fiemap(ni, fieinfo, start, len); ni_unlock(ni); return err; } // clang-format off const struct inode_operations ntfs_file_inode_operations = { .getattr = ntfs_getattr, .setattr = ntfs3_setattr, .listxattr = ntfs_listxattr, .get_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, .fiemap = ntfs_fiemap, }; const struct file_operations ntfs_file_operations = { .llseek = generic_file_llseek, .read_iter = ntfs_file_read_iter, .write_iter = ntfs_file_write_iter, .unlocked_ioctl = ntfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ntfs_compat_ioctl, #endif .splice_read = ntfs_file_splice_read, .mmap = ntfs_file_mmap, .open = ntfs_file_open, .fsync = generic_file_fsync, .splice_write = iter_file_splice_write, .fallocate = ntfs_fallocate, .release = ntfs_file_release, }; // clang-format on
1 1 1 1 2 2 1 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_alloc.h" #include "xfs_fsops.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_dir2.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" #include "xfs_icache.h" #include "xfs_trace.h" #include "xfs_icreate_item.h" #include "xfs_filestream.h" #include "xfs_quota.h" #include "xfs_sysfs.h" #include "xfs_ondisk.h" #include "xfs_rmap_item.h" #include "xfs_refcount_item.h" #include "xfs_bmap_item.h" #include "xfs_reflink.h" #include "xfs_pwork.h" #include "xfs_ag.h" #include "xfs_defer.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" #include "xfs_iunlink_item.h" #include "xfs_dahash_test.h" #include "xfs_rtbitmap.h" #include "scrub/stats.h" #include <linux/magic.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> static const struct super_operations xfs_super_operations; static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */ static struct kset *xfs_kset; /* top-level xfs sysfs dir */ #ifdef DEBUG static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #endif enum xfs_dax_mode { XFS_DAX_INODE = 0, XFS_DAX_ALWAYS = 1, XFS_DAX_NEVER = 2, }; static void xfs_mount_set_dax_mode( struct xfs_mount *mp, enum xfs_dax_mode mode) { switch (mode) { case XFS_DAX_INODE: mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); break; case XFS_DAX_ALWAYS: mp->m_features |= XFS_FEAT_DAX_ALWAYS; mp->m_features &= ~XFS_FEAT_DAX_NEVER; break; case XFS_DAX_NEVER: mp->m_features |= XFS_FEAT_DAX_NEVER; mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; break; } } static const struct constant_table dax_param_enums[] = { {"inode", XFS_DAX_INODE }, {"always", XFS_DAX_ALWAYS }, {"never", XFS_DAX_NEVER }, {} }; /* * Table driven mount option parser. */ enum { Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, }; static const struct fs_parameter_spec xfs_fs_parameters[] = { fsparam_u32("logbufs", Opt_logbufs), fsparam_string("logbsize", Opt_logbsize), fsparam_string("logdev", Opt_logdev), fsparam_string("rtdev", Opt_rtdev), fsparam_flag("wsync", Opt_wsync), fsparam_flag("noalign", Opt_noalign), fsparam_flag("swalloc", Opt_swalloc), fsparam_u32("sunit", Opt_sunit), fsparam_u32("swidth", Opt_swidth), fsparam_flag("nouuid", Opt_nouuid), fsparam_flag("grpid", Opt_grpid), fsparam_flag("nogrpid", Opt_nogrpid), fsparam_flag("bsdgroups", Opt_bsdgroups), fsparam_flag("sysvgroups", Opt_sysvgroups), fsparam_string("allocsize", Opt_allocsize), fsparam_flag("norecovery", Opt_norecovery), fsparam_flag("inode64", Opt_inode64), fsparam_flag("inode32", Opt_inode32), fsparam_flag("ikeep", Opt_ikeep), fsparam_flag("noikeep", Opt_noikeep), fsparam_flag("largeio", Opt_largeio), fsparam_flag("nolargeio", Opt_nolargeio), fsparam_flag("attr2", Opt_attr2), fsparam_flag("noattr2", Opt_noattr2), fsparam_flag("filestreams", Opt_filestreams), fsparam_flag("quota", Opt_quota), fsparam_flag("noquota", Opt_noquota), fsparam_flag("usrquota", Opt_usrquota), fsparam_flag("grpquota", Opt_grpquota), fsparam_flag("prjquota", Opt_prjquota), fsparam_flag("uquota", Opt_uquota), fsparam_flag("gquota", Opt_gquota), fsparam_flag("pquota", Opt_pquota), fsparam_flag("uqnoenforce", Opt_uqnoenforce), fsparam_flag("gqnoenforce", Opt_gqnoenforce), fsparam_flag("pqnoenforce", Opt_pqnoenforce), fsparam_flag("qnoenforce", Opt_qnoenforce), fsparam_flag("discard", Opt_discard), fsparam_flag("nodiscard", Opt_nodiscard), fsparam_flag("dax", Opt_dax), fsparam_enum("dax", Opt_dax_enum, dax_param_enums), {} }; struct proc_xfs_info { uint64_t flag; char *str; }; static int xfs_fs_show_options( struct seq_file *m, struct dentry *root) { static struct proc_xfs_info xfs_info_set[] = { /* the few simple ones we can get from the mount struct */ { XFS_FEAT_IKEEP, ",ikeep" }, { XFS_FEAT_WSYNC, ",wsync" }, { XFS_FEAT_NOALIGN, ",noalign" }, { XFS_FEAT_SWALLOC, ",swalloc" }, { XFS_FEAT_NOUUID, ",nouuid" }, { XFS_FEAT_NORECOVERY, ",norecovery" }, { XFS_FEAT_ATTR2, ",attr2" }, { XFS_FEAT_FILESTREAMS, ",filestreams" }, { XFS_FEAT_GRPID, ",grpid" }, { XFS_FEAT_DISCARD, ",discard" }, { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, { XFS_FEAT_DAX_NEVER, ",dax=never" }, { 0, NULL } }; struct xfs_mount *mp = XFS_M(root->d_sb); struct proc_xfs_info *xfs_infop; for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { if (mp->m_features & xfs_infop->flag) seq_puts(m, xfs_infop->str); } seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); if (xfs_has_allocsize(mp)) seq_printf(m, ",allocsize=%dk", (1 << mp->m_allocsize_log) >> 10); if (mp->m_logbufs > 0) seq_printf(m, ",logbufs=%d", mp->m_logbufs); if (mp->m_logbsize > 0) seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); if (mp->m_logname) seq_show_option(m, "logdev", mp->m_logname); if (mp->m_rtname) seq_show_option(m, "rtdev", mp->m_rtname); if (mp->m_dalign > 0) seq_printf(m, ",sunit=%d", (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); if (mp->m_swidth > 0) seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); if (mp->m_qflags & XFS_UQUOTA_ENFD) seq_puts(m, ",usrquota"); else if (mp->m_qflags & XFS_UQUOTA_ACCT) seq_puts(m, ",uqnoenforce"); if (mp->m_qflags & XFS_PQUOTA_ENFD) seq_puts(m, ",prjquota"); else if (mp->m_qflags & XFS_PQUOTA_ACCT) seq_puts(m, ",pqnoenforce"); if (mp->m_qflags & XFS_GQUOTA_ENFD) seq_puts(m, ",grpquota"); else if (mp->m_qflags & XFS_GQUOTA_ACCT) seq_puts(m, ",gqnoenforce"); if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, ",noquota"); return 0; } static bool xfs_set_inode_alloc_perag( struct xfs_perag *pag, xfs_ino_t ino, xfs_agnumber_t max_metadata) { if (!xfs_is_inode32(pag->pag_mount)) { set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); return false; } if (ino > XFS_MAXINUMBER_32) { clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); return false; } set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); if (pag->pag_agno < max_metadata) set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); else clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); return true; } /* * Set parameters for inode allocation heuristics, taking into account * filesystem size and inode32/inode64 mount options; i.e. specifically * whether or not XFS_FEAT_SMALL_INUMS is set. * * Inode allocation patterns are altered only if inode32 is requested * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. * If altered, XFS_OPSTATE_INODE32 is set as well. * * An agcount independent of that in the mount structure is provided * because in the growfs case, mp->m_sb.sb_agcount is not yet updated * to the potentially higher ag count. * * Returns the maximum AG index which may contain inodes. */ xfs_agnumber_t xfs_set_inode_alloc( struct xfs_mount *mp, xfs_agnumber_t agcount) { xfs_agnumber_t index; xfs_agnumber_t maxagi = 0; xfs_sb_t *sbp = &mp->m_sb; xfs_agnumber_t max_metadata; xfs_agino_t agino; xfs_ino_t ino; /* * Calculate how much should be reserved for inodes to meet * the max inode percentage. Used only for inode32. */ if (M_IGEO(mp)->maxicount) { uint64_t icount; icount = sbp->sb_dblocks * sbp->sb_imax_pct; do_div(icount, 100); icount += sbp->sb_agblocks - 1; do_div(icount, sbp->sb_agblocks); max_metadata = icount; } else { max_metadata = agcount; } /* Get the last possible inode in the filesystem */ agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); /* * If user asked for no more than 32-bit inodes, and the fs is * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter * the allocator to accommodate the request. */ if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); else clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); for (index = 0; index < agcount; index++) { struct xfs_perag *pag; ino = XFS_AGINO_TO_INO(mp, index, agino); pag = xfs_perag_get(mp, index); if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) maxagi++; xfs_perag_put(pag); } return xfs_is_inode32(mp) ? maxagi : agcount; } static int xfs_setup_dax_always( struct xfs_mount *mp) { if (!mp->m_ddev_targp->bt_daxdev && (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { xfs_alert(mp, "DAX unsupported by block device. Turning off DAX."); goto disable_dax; } if (mp->m_super->s_blocksize != PAGE_SIZE) { xfs_alert(mp, "DAX not supported for blocksize. Turning off DAX."); goto disable_dax; } if (xfs_has_reflink(mp) && bdev_is_partition(mp->m_ddev_targp->bt_bdev)) { xfs_alert(mp, "DAX and reflink cannot work with multi-partitions!"); return -EINVAL; } xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); return 0; disable_dax: xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); return 0; } STATIC int xfs_blkdev_get( xfs_mount_t *mp, const char *name, struct bdev_handle **handlep) { int error = 0; *handlep = bdev_open_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES, mp->m_super, &fs_holder_ops); if (IS_ERR(*handlep)) { error = PTR_ERR(*handlep); *handlep = NULL; xfs_warn(mp, "Invalid device [%s], error=%d", name, error); } return error; } STATIC void xfs_shutdown_devices( struct xfs_mount *mp) { /* * Udev is triggered whenever anyone closes a block device or unmounts * a file systemm on a block device. * The default udev rules invoke blkid to read the fs super and create * symlinks to the bdev under /dev/disk. For this, it uses buffered * reads through the page cache. * * xfs_db also uses buffered reads to examine metadata. There is no * coordination between xfs_db and udev, which means that they can run * concurrently. Note there is no coordination between the kernel and * blkid either. * * On a system with 64k pages, the page cache can cache the superblock * and the root inode (and hence the root directory) with the same 64k * page. If udev spawns blkid after the mkfs and the system is busy * enough that it is still running when xfs_db starts up, they'll both * read from the same page in the pagecache. * * The unmount writes updated inode metadata to disk directly. The XFS * buffer cache does not use the bdev pagecache, so it needs to * invalidate that pagecache on unmount. If the above scenario occurs, * the pagecache no longer reflects what's on disk, xfs_db reads the * stale metadata, and fails to find /a. Most of the time this succeeds * because closing a bdev invalidates the page cache, but when processes * race, everyone loses. */ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); invalidate_bdev(mp->m_logdev_targp->bt_bdev); } if (mp->m_rtdev_targp) { blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); invalidate_bdev(mp->m_rtdev_targp->bt_bdev); } blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); invalidate_bdev(mp->m_ddev_targp->bt_bdev); } /* * The file system configurations are: * (1) device (partition) with data and internal log * (2) logical volume with data and log subvolumes. * (3) logical volume with data, log, and realtime subvolumes. * * We only have to handle opening the log and realtime volumes here if * they are present. The data subvolume has already been opened by * get_sb_bdev() and is stored in sb->s_bdev. */ STATIC int xfs_open_devices( struct xfs_mount *mp) { struct super_block *sb = mp->m_super; struct block_device *ddev = sb->s_bdev; struct bdev_handle *logdev_handle = NULL, *rtdev_handle = NULL; int error; /* * Open real time and log devices - order is important. */ if (mp->m_logname) { error = xfs_blkdev_get(mp, mp->m_logname, &logdev_handle); if (error) return error; } if (mp->m_rtname) { error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_handle); if (error) goto out_close_logdev; if (rtdev_handle->bdev == ddev || (logdev_handle && rtdev_handle->bdev == logdev_handle->bdev)) { xfs_warn(mp, "Cannot mount filesystem with identical rtdev and ddev/logdev."); error = -EINVAL; goto out_close_rtdev; } } /* * Setup xfs_mount buffer target pointers */ error = -ENOMEM; mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_handle); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev_handle) { mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_handle); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev_handle && logdev_handle->bdev != ddev) { mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_handle); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { mp->m_logdev_targp = mp->m_ddev_targp; /* Handle won't be used, drop it */ if (logdev_handle) bdev_release(logdev_handle); } return 0; out_free_rtdev_targ: if (mp->m_rtdev_targp) xfs_free_buftarg(mp->m_rtdev_targp); out_free_ddev_targ: xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: if (rtdev_handle) bdev_release(rtdev_handle); out_close_logdev: if (logdev_handle) bdev_release(logdev_handle); return error; } /* * Setup xfs_mount buffer target pointers based on superblock */ STATIC int xfs_setup_devices( struct xfs_mount *mp) { int error; error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); if (error) return error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { unsigned int log_sector_size = BBSIZE; if (xfs_has_sector(mp)) log_sector_size = mp->m_sb.sb_logsectsize; error = xfs_setsize_buftarg(mp->m_logdev_targp, log_sector_size); if (error) return error; } if (mp->m_rtdev_targp) { error = xfs_setsize_buftarg(mp->m_rtdev_targp, mp->m_sb.sb_sectsize); if (error) return error; } return 0; } STATIC int xfs_init_mount_workqueues( struct xfs_mount *mp) { mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 1, mp->m_super->s_id); if (!mp->m_buf_workqueue) goto out; mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); if (!mp->m_unwritten_workqueue) goto out_destroy_buf; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); if (!mp->m_reclaim_workqueue) goto out_destroy_unwritten; mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); if (!mp->m_blockgc_wq) goto out_destroy_reclaim; mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 1, mp->m_super->s_id); if (!mp->m_inodegc_wq) goto out_destroy_blockgc; mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); if (!mp->m_sync_workqueue) goto out_destroy_inodegc; return 0; out_destroy_inodegc: destroy_workqueue(mp->m_inodegc_wq); out_destroy_blockgc: destroy_workqueue(mp->m_blockgc_wq); out_destroy_reclaim: destroy_workqueue(mp->m_reclaim_workqueue); out_destroy_unwritten: destroy_workqueue(mp->m_unwritten_workqueue); out_destroy_buf: destroy_workqueue(mp->m_buf_workqueue); out: return -ENOMEM; } STATIC void xfs_destroy_mount_workqueues( struct xfs_mount *mp) { destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_blockgc_wq); destroy_workqueue(mp->m_inodegc_wq); destroy_workqueue(mp->m_reclaim_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); destroy_workqueue(mp->m_buf_workqueue); } static void xfs_flush_inodes_worker( struct work_struct *work) { struct xfs_mount *mp = container_of(work, struct xfs_mount, m_flush_inodes_work); struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { sync_inodes_sb(sb); up_read(&sb->s_umount); } } /* * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting * for IO to complete so that we effectively throttle multiple callers to the * rate at which IO is completing. */ void xfs_flush_inodes( struct xfs_mount *mp) { /* * If flush_work() returns true then that means we waited for a flush * which was already in progress. Don't bother running another scan. */ if (flush_work(&mp->m_flush_inodes_work)) return; queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); flush_work(&mp->m_flush_inodes_work); } /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { BUG(); return NULL; } /* * Now that the generic code is guaranteed not to be accessing * the linux inode, we can inactivate and reclaim the inode. */ STATIC void xfs_fs_destroy_inode( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); trace_xfs_destroy_inode(ip); ASSERT(!rwsem_is_locked(&inode->i_rwsem)); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inode_mark_reclaimable(ip); } static void xfs_fs_dirty_inode( struct inode *inode, int flags) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; if (!(inode->i_sb->s_flags & SB_LAZYTIME)) return; /* * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed * in flags possibly together with I_DIRTY_SYNC. */ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) return; if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) return; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); xfs_trans_commit(tp); } /* * Slab object creation initialisation for the XFS inode. * This covers only the idempotent fields in the XFS inode; * all other fields need to be initialised on allocation * from the slab. This avoids the need to repeatedly initialise * fields in the xfs inode that left in the initialise state * when freeing the inode. */ STATIC void xfs_fs_inode_init_once( void *inode) { struct xfs_inode *ip = inode; memset(ip, 0, sizeof(struct xfs_inode)); /* vfs inode */ inode_init_once(VFS_I(ip)); /* xfs inode */ atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); } /* * We do an unlocked check for XFS_IDONTCACHE here because we are already * serialised against cache hits here via the inode->i_lock and igrab() in * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be * racing with us, and it avoids needing to grab a spinlock here for every inode * we drop the final reference on. */ STATIC int xfs_fs_drop_inode( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); /* * If this unlinked inode is in the middle of recovery, don't * drop the inode just yet; log recovery will take care of * that. See the comment for this inode flag. */ if (ip->i_flags & XFS_IRECOVERY) { ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); return 0; } return generic_drop_inode(inode); } static void xfs_mount_free( struct xfs_mount *mp) { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_free_buftarg(mp->m_logdev_targp); if (mp->m_rtdev_targp) xfs_free_buftarg(mp->m_rtdev_targp); if (mp->m_ddev_targp) xfs_free_buftarg(mp->m_ddev_targp); debugfs_remove(mp->m_debugfs); kfree(mp->m_rtname); kfree(mp->m_logname); kmem_free(mp); } STATIC int xfs_fs_sync_fs( struct super_block *sb, int wait) { struct xfs_mount *mp = XFS_M(sb); int error; trace_xfs_fs_sync_fs(mp, __return_address); /* * Doing anything during the async pass would be counterproductive. */ if (!wait) return 0; error = xfs_log_force(mp, XFS_LOG_SYNC); if (error) return error; if (laptop_mode) { /* * The disk must be active because we're syncing. * We schedule log work now (now that the disk is * active) instead of later (when it might not be). */ flush_delayed_work(&mp->m_log->l_work); } /* * If we are called with page faults frozen out, it means we are about * to freeze the transaction subsystem. Take the opportunity to shut * down inodegc because once SB_FREEZE_FS is set it's too late to * prevent inactivation races with freeze. The fs doesn't get called * again by the freezing process until after SB_FREEZE_FS has been set, * so it's now or never. Same logic applies to speculative allocation * garbage collection. * * We don't care if this is a normal syncfs call that does this or * freeze that does this - we can run this multiple times without issue * and we won't race with a restart because a restart can only occur * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. */ if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { xfs_inodegc_stop(mp); xfs_blockgc_stop(mp); } return 0; } STATIC int xfs_fs_statfs( struct dentry *dentry, struct kstatfs *statp) { struct xfs_mount *mp = XFS_M(dentry->d_sb); xfs_sb_t *sbp = &mp->m_sb; struct xfs_inode *ip = XFS_I(d_inode(dentry)); uint64_t fakeinos, id; uint64_t icount; uint64_t ifree; uint64_t fdblocks; xfs_extlen_t lsize; int64_t ffree; /* * Expedite background inodegc but don't wait. We do not want to block * here waiting hours for a billion extent file to be truncated. */ xfs_inodegc_push(mp); statp->f_type = XFS_SUPER_MAGIC; statp->f_namelen = MAXNAMELEN - 1; id = huge_encode_dev(mp->m_ddev_targp->bt_dev); statp->f_fsid = u64_to_fsid(id); icount = percpu_counter_sum(&mp->m_icount); ifree = percpu_counter_sum(&mp->m_ifree); fdblocks = percpu_counter_sum(&mp->m_fdblocks); spin_lock(&mp->m_sb_lock); statp->f_bsize = sbp->sb_blocksize; lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; statp->f_blocks = sbp->sb_dblocks - lsize; spin_unlock(&mp->m_sb_lock); /* make sure statp->f_bfree does not underflow */ statp->f_bfree = max_t(int64_t, 0, fdblocks - xfs_fdblocks_unavailable(mp)); statp->f_bavail = statp->f_bfree; fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); if (M_IGEO(mp)->maxicount) statp->f_files = min_t(typeof(statp->f_files), statp->f_files, M_IGEO(mp)->maxicount); /* If sb_icount overshot maxicount, report actual allocation */ statp->f_files = max_t(typeof(statp->f_files), statp->f_files, sbp->sb_icount); /* make sure statp->f_ffree does not underflow */ ffree = statp->f_files - (icount - ifree); statp->f_ffree = max_t(int64_t, ffree, 0); if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) xfs_qm_statvfs(ip, statp); if (XFS_IS_REALTIME_MOUNT(mp) && (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { s64 freertx; statp->f_blocks = sbp->sb_rblocks; freertx = percpu_counter_sum_positive(&mp->m_frextents); statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx); } return 0; } STATIC void xfs_save_resvblks(struct xfs_mount *mp) { mp->m_resblks_save = mp->m_resblks; xfs_reserve_blocks(mp, 0); } STATIC void xfs_restore_resvblks(struct xfs_mount *mp) { uint64_t resblks; if (mp->m_resblks_save) { resblks = mp->m_resblks_save; mp->m_resblks_save = 0; } else resblks = xfs_default_resblks(mp); xfs_reserve_blocks(mp, resblks); } /* * Second stage of a freeze. The data is already frozen so we only * need to take care of the metadata. Once that's done sync the superblock * to the log to dirty it in case of a crash while frozen. This ensures that we * will recover the unlinked inode lists on the next mount. */ STATIC int xfs_fs_freeze( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); unsigned int flags; int ret; /* * The filesystem is now frozen far enough that memory reclaim * cannot safely operate on the filesystem. Hence we need to * set a GFP_NOFS context here to avoid recursion deadlocks. */ flags = memalloc_nofs_save(); xfs_save_resvblks(mp); ret = xfs_log_quiesce(mp); memalloc_nofs_restore(flags); /* * For read-write filesystems, we need to restart the inodegc on error * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not * going to be run to restart it now. We are at SB_FREEZE_FS level * here, so we can restart safely without racing with a stop in * xfs_fs_sync_fs(). */ if (ret && !xfs_is_readonly(mp)) { xfs_blockgc_start(mp); xfs_inodegc_start(mp); } return ret; } STATIC int xfs_fs_unfreeze( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); xfs_restore_resvblks(mp); xfs_log_work_queue(mp); /* * Don't reactivate the inodegc worker on a readonly filesystem because * inodes are sent directly to reclaim. Don't reactivate the blockgc * worker because there are no speculative preallocations on a readonly * filesystem. */ if (!xfs_is_readonly(mp)) { xfs_blockgc_start(mp); xfs_inodegc_start(mp); } return 0; } /* * This function fills in xfs_mount_t fields based on mount args. * Note: the superblock _has_ now been read in. */ STATIC int xfs_finish_flags( struct xfs_mount *mp) { /* Fail a mount where the logbuf is smaller than the log stripe */ if (xfs_has_logv2(mp)) { if (mp->m_logbsize <= 0 && mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { mp->m_logbsize = mp->m_sb.sb_logsunit; } else if (mp->m_logbsize > 0 && mp->m_logbsize < mp->m_sb.sb_logsunit) { xfs_warn(mp, "logbuf size must be greater than or equal to log stripe size"); return -EINVAL; } } else { /* Fail a mount if the logbuf is larger than 32K */ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { xfs_warn(mp, "logbuf size for version 1 logs must be 16K or 32K"); return -EINVAL; } } /* * V5 filesystems always use attr2 format for attributes. */ if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) { xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " "attr2 is always enabled for V5 filesystems."); return -EINVAL; } /* * prohibit r/w mounts of read-only filesystems */ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { xfs_warn(mp, "cannot mount a read-only filesystem as read-write"); return -EROFS; } if ((mp->m_qflags & XFS_GQUOTA_ACCT) && (mp->m_qflags & XFS_PQUOTA_ACCT) && !xfs_has_pquotino(mp)) { xfs_warn(mp, "Super block does not support project and group quota together"); return -EINVAL; } return 0; } static int xfs_init_percpu_counters( struct xfs_mount *mp) { int error; error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); if (error) return -ENOMEM; error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); if (error) goto free_icount; error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); if (error) goto free_ifree; error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); if (error) goto free_fdblocks; error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL); if (error) goto free_delalloc; return 0; free_delalloc: percpu_counter_destroy(&mp->m_delalloc_blks); free_fdblocks: percpu_counter_destroy(&mp->m_fdblocks); free_ifree: percpu_counter_destroy(&mp->m_ifree); free_icount: percpu_counter_destroy(&mp->m_icount); return -ENOMEM; } void xfs_reinit_percpu_counters( struct xfs_mount *mp) { percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents); } static void xfs_destroy_percpu_counters( struct xfs_mount *mp) { percpu_counter_destroy(&mp->m_icount); percpu_counter_destroy(&mp->m_ifree); percpu_counter_destroy(&mp->m_fdblocks); ASSERT(xfs_is_shutdown(mp) || percpu_counter_sum(&mp->m_delalloc_blks) == 0); percpu_counter_destroy(&mp->m_delalloc_blks); percpu_counter_destroy(&mp->m_frextents); } static int xfs_inodegc_init_percpu( struct xfs_mount *mp) { struct xfs_inodegc *gc; int cpu; mp->m_inodegc = alloc_percpu(struct xfs_inodegc); if (!mp->m_inodegc) return -ENOMEM; for_each_possible_cpu(cpu) { gc = per_cpu_ptr(mp->m_inodegc, cpu); gc->cpu = cpu; gc->mp = mp; init_llist_head(&gc->list); gc->items = 0; gc->error = 0; INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); } return 0; } static void xfs_inodegc_free_percpu( struct xfs_mount *mp) { if (!mp->m_inodegc) return; free_percpu(mp->m_inodegc); } static void xfs_fs_put_super( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); xfs_filestream_unmount(mp); xfs_unmountfs(mp); xfs_freesb(mp); xchk_mount_stats_free(mp); free_percpu(mp->m_stats.xs_stats); xfs_inodegc_free_percpu(mp); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); xfs_shutdown_devices(mp); } static long xfs_fs_nr_cached_objects( struct super_block *sb, struct shrink_control *sc) { /* Paranoia: catch incorrect calls during mount setup or teardown */ if (WARN_ON_ONCE(!sb->s_fs_info)) return 0; return xfs_reclaim_inodes_count(XFS_M(sb)); } static long xfs_fs_free_cached_objects( struct super_block *sb, struct shrink_control *sc) { return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); } static void xfs_fs_shutdown( struct super_block *sb) { xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); } static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .drop_inode = xfs_fs_drop_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, .show_options = xfs_fs_show_options, .nr_cached_objects = xfs_fs_nr_cached_objects, .free_cached_objects = xfs_fs_free_cached_objects, .shutdown = xfs_fs_shutdown, }; static int suffix_kstrtoint( const char *s, unsigned int base, int *res) { int last, shift_left_factor = 0, _res; char *value; int ret = 0; value = kstrdup(s, GFP_KERNEL); if (!value) return -ENOMEM; last = strlen(value) - 1; if (value[last] == 'K' || value[last] == 'k') { shift_left_factor = 10; value[last] = '\0'; } if (value[last] == 'M' || value[last] == 'm') { shift_left_factor = 20; value[last] = '\0'; } if (value[last] == 'G' || value[last] == 'g') { shift_left_factor = 30; value[last] = '\0'; } if (kstrtoint(value, base, &_res)) ret = -EINVAL; kfree(value); *res = _res << shift_left_factor; return ret; } static inline void xfs_fs_warn_deprecated( struct fs_context *fc, struct fs_parameter *param, uint64_t flag, bool value) { /* Don't print the warning if reconfiguring and current mount point * already had the flag set */ if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) return; xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); } /* * Set mount state from a mount option. * * NOTE: mp->m_super is NULL here! */ static int xfs_fs_parse_param( struct fs_context *fc, struct fs_parameter *param) { struct xfs_mount *parsing_mp = fc->s_fs_info; struct fs_parse_result result; int size = 0; int opt; opt = fs_parse(fc, xfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_logbufs: parsing_mp->m_logbufs = result.uint_32; return 0; case Opt_logbsize: if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) return -EINVAL; return 0; case Opt_logdev: kfree(parsing_mp->m_logname); parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); if (!parsing_mp->m_logname) return -ENOMEM; return 0; case Opt_rtdev: kfree(parsing_mp->m_rtname); parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); if (!parsing_mp->m_rtname) return -ENOMEM; return 0; case Opt_allocsize: if (suffix_kstrtoint(param->string, 10, &size)) return -EINVAL; parsing_mp->m_allocsize_log = ffs(size) - 1; parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; return 0; case Opt_grpid: case Opt_bsdgroups: parsing_mp->m_features |= XFS_FEAT_GRPID; return 0; case Opt_nogrpid: case Opt_sysvgroups: parsing_mp->m_features &= ~XFS_FEAT_GRPID; return 0; case Opt_wsync: parsing_mp->m_features |= XFS_FEAT_WSYNC; return 0; case Opt_norecovery: parsing_mp->m_features |= XFS_FEAT_NORECOVERY; return 0; case Opt_noalign: parsing_mp->m_features |= XFS_FEAT_NOALIGN; return 0; case Opt_swalloc: parsing_mp->m_features |= XFS_FEAT_SWALLOC; return 0; case Opt_sunit: parsing_mp->m_dalign = result.uint_32; return 0; case Opt_swidth: parsing_mp->m_swidth = result.uint_32; return 0; case Opt_inode32: parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; return 0; case Opt_inode64: parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; return 0; case Opt_nouuid: parsing_mp->m_features |= XFS_FEAT_NOUUID; return 0; case Opt_largeio: parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_nolargeio: parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_filestreams: parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; return 0; case Opt_noquota: parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; return 0; case Opt_quota: case Opt_uquota: case Opt_usrquota: parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); return 0; case Opt_qnoenforce: case Opt_uqnoenforce: parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; return 0; case Opt_pquota: case Opt_prjquota: parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); return 0; case Opt_pqnoenforce: parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; return 0; case Opt_gquota: case Opt_grpquota: parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); return 0; case Opt_gqnoenforce: parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; return 0; case Opt_discard: parsing_mp->m_features |= XFS_FEAT_DISCARD; return 0; case Opt_nodiscard: parsing_mp->m_features &= ~XFS_FEAT_DISCARD; return 0; #ifdef CONFIG_FS_DAX case Opt_dax: xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); return 0; case Opt_dax_enum: xfs_mount_set_dax_mode(parsing_mp, result.uint_32); return 0; #endif /* Following mount options will be removed in September 2025 */ case Opt_ikeep: xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); parsing_mp->m_features |= XFS_FEAT_IKEEP; return 0; case Opt_noikeep: xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); parsing_mp->m_features &= ~XFS_FEAT_IKEEP; return 0; case Opt_attr2: xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); parsing_mp->m_features |= XFS_FEAT_ATTR2; return 0; case Opt_noattr2: xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); parsing_mp->m_features |= XFS_FEAT_NOATTR2; return 0; default: xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); return -EINVAL; } return 0; } static int xfs_fs_validate_params( struct xfs_mount *mp) { /* No recovery flag requires a read-only mount */ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { xfs_warn(mp, "no-recovery mounts must be read-only."); return -EINVAL; } /* * We have not read the superblock at this point, so only the attr2 * mount option can set the attr2 feature by this stage. */ if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); return -EINVAL; } if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { xfs_warn(mp, "sunit and swidth options incompatible with the noalign option"); return -EINVAL; } if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { xfs_warn(mp, "quota support not available in this kernel."); return -EINVAL; } if ((mp->m_dalign && !mp->m_swidth) || (!mp->m_dalign && mp->m_swidth)) { xfs_warn(mp, "sunit and swidth must be specified together"); return -EINVAL; } if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { xfs_warn(mp, "stripe width (%d) must be a multiple of the stripe unit (%d)", mp->m_swidth, mp->m_dalign); return -EINVAL; } if (mp->m_logbufs != -1 && mp->m_logbufs != 0 && (mp->m_logbufs < XLOG_MIN_ICLOGS || mp->m_logbufs > XLOG_MAX_ICLOGS)) { xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); return -EINVAL; } if (mp->m_logbsize != -1 && mp->m_logbsize != 0 && (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || !is_power_of_2(mp->m_logbsize))) { xfs_warn(mp, "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", mp->m_logbsize); return -EINVAL; } if (xfs_has_allocsize(mp) && (mp->m_allocsize_log > XFS_MAX_IO_LOG || mp->m_allocsize_log < XFS_MIN_IO_LOG)) { xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); return -EINVAL; } return 0; } struct dentry * xfs_debugfs_mkdir( const char *name, struct dentry *parent) { struct dentry *child; /* Apparently we're expected to ignore error returns?? */ child = debugfs_create_dir(name, parent); if (IS_ERR(child)) return NULL; return child; } static int xfs_fs_fill_super( struct super_block *sb, struct fs_context *fc) { struct xfs_mount *mp = sb->s_fs_info; struct inode *root; int flags = 0, error; mp->m_super = sb; /* * Copy VFS mount flags from the context now that all parameter parsing * is guaranteed to have been completed by either the old mount API or * the newer fsopen/fsconfig API. */ if (fc->sb_flags & SB_RDONLY) set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); if (fc->sb_flags & SB_DIRSYNC) mp->m_features |= XFS_FEAT_DIRSYNC; if (fc->sb_flags & SB_SYNCHRONOUS) mp->m_features |= XFS_FEAT_WSYNC; error = xfs_fs_validate_params(mp); if (error) return error; sb_min_blocksize(sb, BBSIZE); sb->s_xattr = xfs_xattr_handlers; sb->s_export_op = &xfs_export_operations; #ifdef CONFIG_XFS_QUOTA sb->s_qcop = &xfs_quotactl_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif sb->s_op = &xfs_super_operations; /* * Delay mount work if the debug hook is set. This is debug * instrumention to coordinate simulation of xfs mount failures with * VFS superblock operations */ if (xfs_globals.mount_delay) { xfs_notice(mp, "Delaying mount for %d seconds.", xfs_globals.mount_delay); msleep(xfs_globals.mount_delay * 1000); } if (fc->sb_flags & SB_SILENT) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) return error; if (xfs_debugfs) { mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id, xfs_debugfs); } else { mp->m_debugfs = NULL; } error = xfs_init_mount_workqueues(mp); if (error) goto out_shutdown_devices; error = xfs_init_percpu_counters(mp); if (error) goto out_destroy_workqueues; error = xfs_inodegc_init_percpu(mp); if (error) goto out_destroy_counters; /* Allocate stats memory before we do operations that might use it */ mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); if (!mp->m_stats.xs_stats) { error = -ENOMEM; goto out_destroy_inodegc; } error = xchk_mount_stats_alloc(mp); if (error) goto out_free_stats; error = xfs_readsb(mp, flags); if (error) goto out_free_scrub_stats; error = xfs_finish_flags(mp); if (error) goto out_free_sb; error = xfs_setup_devices(mp); if (error) goto out_free_sb; /* V4 support is undergoing deprecation. */ if (!xfs_has_crc(mp)) { #ifdef CONFIG_XFS_SUPPORT_V4 xfs_warn_once(mp, "Deprecated V4 format (crc=0) will not be supported after September 2030."); #else xfs_warn(mp, "Deprecated V4 format (crc=0) not supported by kernel."); error = -EINVAL; goto out_free_sb; #endif } /* ASCII case insensitivity is undergoing deprecation. */ if (xfs_has_asciici(mp)) { #ifdef CONFIG_XFS_SUPPORT_ASCII_CI xfs_warn_once(mp, "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); #else xfs_warn(mp, "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel."); error = -EINVAL; goto out_free_sb; #endif } /* Filesystem claims it needs repair, so refuse the mount. */ if (xfs_has_needsrepair(mp)) { xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); error = -EFSCORRUPTED; goto out_free_sb; } /* * Don't touch the filesystem if a user tool thinks it owns the primary * superblock. mkfs doesn't clear the flag from secondary supers, so * we don't check them at all. */ if (mp->m_sb.sb_inprogress) { xfs_warn(mp, "Offline file system operation in progress!"); error = -EFSCORRUPTED; goto out_free_sb; } /* * Until this is fixed only page-sized or smaller data blocks work. */ if (mp->m_sb.sb_blocksize > PAGE_SIZE) { xfs_warn(mp, "File system with blocksize %d bytes. " "Only pagesize (%ld) or less will currently work.", mp->m_sb.sb_blocksize, PAGE_SIZE); error = -ENOSYS; goto out_free_sb; } /* Ensure this filesystem fits in the page cache limits */ if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { xfs_warn(mp, "file system too large to be mounted on this system."); error = -EFBIG; goto out_free_sb; } /* * XFS block mappings use 54 bits to store the logical block offset. * This should suffice to handle the maximum file size that the VFS * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON * to check this assertion. * * Avoid integer overflow by comparing the maximum bmbt offset to the * maximum pagecache offset in units of fs blocks. */ if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { xfs_warn(mp, "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), XFS_MAX_FILEOFF); error = -EINVAL; goto out_free_sb; } error = xfs_filestream_mount(mp); if (error) goto out_free_sb; /* * we must configure the block size in the superblock before we run the * full mount process as the mount process can lookup and cache inodes. */ sb->s_magic = XFS_SUPER_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; if (xfs_has_bigtime(mp)) { sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); } else { sb->s_time_min = XFS_LEGACY_TIME_MIN; sb->s_time_max = XFS_LEGACY_TIME_MAX; } trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); sb->s_iflags |= SB_I_CGROUPWB; set_posix_acl_flag(sb); /* version 5 superblocks support inode version counters. */ if (xfs_has_crc(mp)) sb->s_flags |= SB_I_VERSION; if (xfs_has_dax_always(mp)) { error = xfs_setup_dax_always(mp); if (error) goto out_filestream_unmount; } if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { xfs_warn(mp, "mounting with \"discard\" option, but the device does not support discard"); mp->m_features &= ~XFS_FEAT_DISCARD; } if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, "reflink not compatible with realtime device!"); error = -EINVAL; goto out_filestream_unmount; } if (xfs_globals.always_cow) { xfs_info(mp, "using DEBUG-only always_cow mode."); mp->m_always_cow = true; } } if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { xfs_alert(mp, "reverse mapping btree not compatible with realtime device!"); error = -EINVAL; goto out_filestream_unmount; } error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = -ENOENT; goto out_unmount; } sb->s_root = d_make_root(root); if (!sb->s_root) { error = -ENOMEM; goto out_unmount; } return 0; out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); out_free_scrub_stats: xchk_mount_stats_free(mp); out_free_stats: free_percpu(mp->m_stats.xs_stats); out_destroy_inodegc: xfs_inodegc_free_percpu(mp); out_destroy_counters: xfs_destroy_percpu_counters(mp); out_destroy_workqueues: xfs_destroy_mount_workqueues(mp); out_shutdown_devices: xfs_shutdown_devices(mp); return error; out_unmount: xfs_filestream_unmount(mp); xfs_unmountfs(mp); goto out_free_sb; } static int xfs_fs_get_tree( struct fs_context *fc) { return get_tree_bdev(fc, xfs_fs_fill_super); } static int xfs_remount_rw( struct xfs_mount *mp) { struct xfs_sb *sbp = &mp->m_sb; int error; if (xfs_has_norecovery(mp)) { xfs_warn(mp, "ro->rw transition prohibited on norecovery mount"); return -EINVAL; } if (xfs_sb_is_v5(sbp) && xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_warn(mp, "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); return -EINVAL; } clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); /* * If this is the first remount to writeable state we might have some * superblock changes to update. */ if (mp->m_update_sb) { error = xfs_sync_sb(mp, false); if (error) { xfs_warn(mp, "failed to write sb changes"); return error; } mp->m_update_sb = false; } /* * Fill out the reserve pool if it is empty. Use the stashed value if * it is non-zero, otherwise go with the default. */ xfs_restore_resvblks(mp); xfs_log_work_queue(mp); xfs_blockgc_start(mp); /* Create the per-AG metadata reservation pool .*/ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) return error; /* Re-enable the background inode inactivation worker. */ xfs_inodegc_start(mp); return 0; } static int xfs_remount_ro( struct xfs_mount *mp) { struct xfs_icwalk icw = { .icw_flags = XFS_ICWALK_FLAG_SYNC, }; int error; /* Flush all the dirty data to disk. */ error = sync_filesystem(mp->m_super); if (error) return error; /* * Cancel background eofb scanning so it cannot race with the final * log force+buftarg wait and deadlock the remount. */ xfs_blockgc_stop(mp); /* * Clear out all remaining COW staging extents and speculative post-EOF * preallocations so that we don't leave inodes requiring inactivation * cleanups during reclaim on a read-only mount. We must process every * cached inode, so this requires a synchronous cache scan. */ error = xfs_blockgc_free_space(mp, &icw); if (error) { xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } /* * Stop the inodegc background worker. xfs_fs_reconfigure already * flushed all pending inodegc work when it sync'd the filesystem. * The VFS holds s_umount, so we know that inodes cannot enter * xfs_fs_destroy_inode during a remount operation. In readonly mode * we send inodes straight to reclaim, so no inodes will be queued. */ xfs_inodegc_stop(mp); /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } /* * Before we sync the metadata, we need to free up the reserve block * pool so that the used block count in the superblock on disk is * correct at the end of the remount. Stash the current* reserve pool * size so that if we get remounted rw, we can return it to the same * size. */ xfs_save_resvblks(mp); xfs_log_clean(mp); set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); return 0; } /* * Logically we would return an error here to prevent users from believing * they might have changed mount options using remount which can't be changed. * * But unfortunately mount(8) adds all options from mtab and fstab to the mount * arguments in some cases so we can't blindly reject options, but have to * check for each specified option if it actually differs from the currently * set option and only reject it if that's the case. * * Until that is implemented we return success for every remount request, and * silently ignore all options that we can't actually change. */ static int xfs_fs_reconfigure( struct fs_context *fc) { struct xfs_mount *mp = XFS_M(fc->root->d_sb); struct xfs_mount *new_mp = fc->s_fs_info; int flags = fc->sb_flags; int error; /* version 5 superblocks always support version counters. */ if (xfs_has_crc(mp)) fc->sb_flags |= SB_I_VERSION; error = xfs_fs_validate_params(new_mp); if (error) return error; /* inode32 -> inode64 */ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { mp->m_features &= ~XFS_FEAT_SMALL_INUMS; mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* inode64 -> inode32 */ if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { mp->m_features |= XFS_FEAT_SMALL_INUMS; mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* ro -> rw */ if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { error = xfs_remount_rw(mp); if (error) return error; } /* rw -> ro */ if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { error = xfs_remount_ro(mp); if (error) return error; } return 0; } static void xfs_fs_free( struct fs_context *fc) { struct xfs_mount *mp = fc->s_fs_info; /* * mp is stored in the fs_context when it is initialized. * mp is transferred to the superblock on a successful mount, * but if an error occurs before the transfer we have to free * it here. */ if (mp) xfs_mount_free(mp); } static const struct fs_context_operations xfs_context_ops = { .parse_param = xfs_fs_parse_param, .get_tree = xfs_fs_get_tree, .reconfigure = xfs_fs_reconfigure, .free = xfs_fs_free, }; /* * WARNING: do not initialise any parameters in this function that depend on * mount option parsing having already been performed as this can be called from * fsopen() before any parameters have been set. */ static int xfs_init_fs_context( struct fs_context *fc) { struct xfs_mount *mp; mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO); if (!mp) return -ENOMEM; spin_lock_init(&mp->m_sb_lock); INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); spin_lock_init(&mp->m_perag_lock); mutex_init(&mp->m_growlock); INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); mp->m_kobj.kobject.kset = xfs_kset; /* * We don't create the finobt per-ag space reservation until after log * recovery, so we must set this to true so that an ifree transaction * started during log recovery will not depend on space reservations * for finobt expansion. */ mp->m_finobt_nores = true; /* * These can be overridden by the mount option parsing. */ mp->m_logbufs = -1; mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ fc->s_fs_info = mp; fc->ops = &xfs_context_ops; return 0; } static void xfs_kill_sb( struct super_block *sb) { kill_block_super(sb); xfs_mount_free(XFS_M(sb)); } static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", .init_fs_context = xfs_init_fs_context, .parameters = xfs_fs_parameters, .kill_sb = xfs_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_caches(void) { int error; xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!xfs_buf_cache) goto out; xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", sizeof(struct xlog_ticket), 0, 0, NULL); if (!xfs_log_ticket_cache) goto out_destroy_buf_cache; error = xfs_btree_init_cur_caches(); if (error) goto out_destroy_log_ticket_cache; error = xfs_defer_init_item_caches(); if (error) goto out_destroy_btree_cur_cache; xfs_da_state_cache = kmem_cache_create("xfs_da_state", sizeof(struct xfs_da_state), 0, 0, NULL); if (!xfs_da_state_cache) goto out_destroy_defer_item_cache; xfs_ifork_cache = kmem_cache_create("xfs_ifork", sizeof(struct xfs_ifork), 0, 0, NULL); if (!xfs_ifork_cache) goto out_destroy_da_state_cache; xfs_trans_cache = kmem_cache_create("xfs_trans", sizeof(struct xfs_trans), 0, 0, NULL); if (!xfs_trans_cache) goto out_destroy_ifork_cache; /* * The size of the cache-allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", sizeof(struct xfs_buf_log_item), 0, 0, NULL); if (!xfs_buf_item_cache) goto out_destroy_trans_cache; xfs_efd_cache = kmem_cache_create("xfs_efd_item", xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_efd_cache) goto out_destroy_buf_item_cache; xfs_efi_cache = kmem_cache_create("xfs_efi_item", xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_efi_cache) goto out_destroy_efd_cache; xfs_inode_cache = kmem_cache_create("xfs_inode", sizeof(struct xfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), xfs_fs_inode_init_once); if (!xfs_inode_cache) goto out_destroy_efi_cache; xfs_ili_cache = kmem_cache_create("xfs_ili", sizeof(struct xfs_inode_log_item), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!xfs_ili_cache) goto out_destroy_inode_cache; xfs_icreate_cache = kmem_cache_create("xfs_icr", sizeof(struct xfs_icreate_item), 0, 0, NULL); if (!xfs_icreate_cache) goto out_destroy_ili_cache; xfs_rud_cache = kmem_cache_create("xfs_rud_item", sizeof(struct xfs_rud_log_item), 0, 0, NULL); if (!xfs_rud_cache) goto out_destroy_icreate_cache; xfs_rui_cache = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_rui_cache) goto out_destroy_rud_cache; xfs_cud_cache = kmem_cache_create("xfs_cud_item", sizeof(struct xfs_cud_log_item), 0, 0, NULL); if (!xfs_cud_cache) goto out_destroy_rui_cache; xfs_cui_cache = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_cui_cache) goto out_destroy_cud_cache; xfs_bud_cache = kmem_cache_create("xfs_bud_item", sizeof(struct xfs_bud_log_item), 0, 0, NULL); if (!xfs_bud_cache) goto out_destroy_cui_cache; xfs_bui_cache = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_bui_cache) goto out_destroy_bud_cache; xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", sizeof(struct xfs_attrd_log_item), 0, 0, NULL); if (!xfs_attrd_cache) goto out_destroy_bui_cache; xfs_attri_cache = kmem_cache_create("xfs_attri_item", sizeof(struct xfs_attri_log_item), 0, 0, NULL); if (!xfs_attri_cache) goto out_destroy_attrd_cache; xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", sizeof(struct xfs_iunlink_item), 0, 0, NULL); if (!xfs_iunlink_cache) goto out_destroy_attri_cache; return 0; out_destroy_attri_cache: kmem_cache_destroy(xfs_attri_cache); out_destroy_attrd_cache: kmem_cache_destroy(xfs_attrd_cache); out_destroy_bui_cache: kmem_cache_destroy(xfs_bui_cache); out_destroy_bud_cache: kmem_cache_destroy(xfs_bud_cache); out_destroy_cui_cache: kmem_cache_destroy(xfs_cui_cache); out_destroy_cud_cache: kmem_cache_destroy(xfs_cud_cache); out_destroy_rui_cache: kmem_cache_destroy(xfs_rui_cache); out_destroy_rud_cache: kmem_cache_destroy(xfs_rud_cache); out_destroy_icreate_cache: kmem_cache_destroy(xfs_icreate_cache); out_destroy_ili_cache: kmem_cache_destroy(xfs_ili_cache); out_destroy_inode_cache: kmem_cache_destroy(xfs_inode_cache); out_destroy_efi_cache: kmem_cache_destroy(xfs_efi_cache); out_destroy_efd_cache: kmem_cache_destroy(xfs_efd_cache); out_destroy_buf_item_cache: kmem_cache_destroy(xfs_buf_item_cache); out_destroy_trans_cache: kmem_cache_destroy(xfs_trans_cache); out_destroy_ifork_cache: kmem_cache_destroy(xfs_ifork_cache); out_destroy_da_state_cache: kmem_cache_destroy(xfs_da_state_cache); out_destroy_defer_item_cache: xfs_defer_destroy_item_caches(); out_destroy_btree_cur_cache: xfs_btree_destroy_cur_caches(); out_destroy_log_ticket_cache: kmem_cache_destroy(xfs_log_ticket_cache); out_destroy_buf_cache: kmem_cache_destroy(xfs_buf_cache); out: return -ENOMEM; } STATIC void xfs_destroy_caches(void) { /* * Make sure all delayed rcu free are flushed before we * destroy caches. */ rcu_barrier(); kmem_cache_destroy(xfs_iunlink_cache); kmem_cache_destroy(xfs_attri_cache); kmem_cache_destroy(xfs_attrd_cache); kmem_cache_destroy(xfs_bui_cache); kmem_cache_destroy(xfs_bud_cache); kmem_cache_destroy(xfs_cui_cache); kmem_cache_destroy(xfs_cud_cache); kmem_cache_destroy(xfs_rui_cache); kmem_cache_destroy(xfs_rud_cache); kmem_cache_destroy(xfs_icreate_cache); kmem_cache_destroy(xfs_ili_cache); kmem_cache_destroy(xfs_inode_cache); kmem_cache_destroy(xfs_efi_cache); kmem_cache_destroy(xfs_efd_cache); kmem_cache_destroy(xfs_buf_item_cache); kmem_cache_destroy(xfs_trans_cache); kmem_cache_destroy(xfs_ifork_cache); kmem_cache_destroy(xfs_da_state_cache); xfs_defer_destroy_item_caches(); xfs_btree_destroy_cur_caches(); kmem_cache_destroy(xfs_log_ticket_cache); kmem_cache_destroy(xfs_buf_cache); } STATIC int __init xfs_init_workqueues(void) { /* * The allocation workqueue can be used in memory reclaim situations * (writepage path), and parallelism is only limited by the number of * AGs in all the filesystems mounted. Hence use the default large * max_active value for this workqueue. */ xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); if (!xfs_alloc_wq) return -ENOMEM; xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 0); if (!xfs_discard_wq) goto out_free_alloc_wq; return 0; out_free_alloc_wq: destroy_workqueue(xfs_alloc_wq); return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { destroy_workqueue(xfs_discard_wq); destroy_workqueue(xfs_alloc_wq); } STATIC int __init init_xfs_fs(void) { int error; xfs_check_ondisk_structs(); error = xfs_dahash_test(); if (error) return error; printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); xfs_dir_startup(); error = xfs_init_caches(); if (error) goto out; error = xfs_init_workqueues(); if (error) goto out_destroy_caches; error = xfs_mru_cache_init(); if (error) goto out_destroy_wq; error = xfs_init_procfs(); if (error) goto out_mru_cache_uninit; error = xfs_sysctl_register(); if (error) goto out_cleanup_procfs; xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL); xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); if (!xfs_kset) { error = -ENOMEM; goto out_debugfs_unregister; } xfsstats.xs_kobj.kobject.kset = xfs_kset; xfsstats.xs_stats = alloc_percpu(struct xfsstats); if (!xfsstats.xs_stats) { error = -ENOMEM; goto out_kset_unregister; } error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, "stats"); if (error) goto out_free_stats; error = xchk_global_stats_setup(xfs_debugfs); if (error) goto out_remove_stats_kobj; #ifdef DEBUG xfs_dbg_kobj.kobject.kset = xfs_kset; error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); if (error) goto out_remove_scrub_stats; #endif error = xfs_qm_init(); if (error) goto out_remove_dbg_kobj; error = register_filesystem(&xfs_fs_type); if (error) goto out_qm_exit; return 0; out_qm_exit: xfs_qm_exit(); out_remove_dbg_kobj: #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); out_remove_scrub_stats: #endif xchk_global_stats_teardown(); out_remove_stats_kobj: xfs_sysfs_del(&xfsstats.xs_kobj); out_free_stats: free_percpu(xfsstats.xs_stats); out_kset_unregister: kset_unregister(xfs_kset); out_debugfs_unregister: debugfs_remove(xfs_debugfs); xfs_sysctl_unregister(); out_cleanup_procfs: xfs_cleanup_procfs(); out_mru_cache_uninit: xfs_mru_cache_uninit(); out_destroy_wq: xfs_destroy_workqueues(); out_destroy_caches: xfs_destroy_caches(); out: return error; } STATIC void __exit exit_xfs_fs(void) { xfs_qm_exit(); unregister_filesystem(&xfs_fs_type); #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); #endif xchk_global_stats_teardown(); xfs_sysfs_del(&xfsstats.xs_kobj); free_percpu(xfsstats.xs_stats); kset_unregister(xfs_kset); debugfs_remove(xfs_debugfs); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); xfs_destroy_caches(); xfs_uuid_table_free(); } module_init(init_xfs_fs); module_exit(exit_xfs_fs); MODULE_AUTHOR("Silicon Graphics, Inc."); MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); MODULE_LICENSE("GPL");
330 530 686 686 985 704 686 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 // SPDX-License-Identifier: GPL-2.0-only #include <linux/init.h> #include <linux/scatterlist.h> #include <linux/mempool.h> #include <linux/slab.h> #define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools) #define SG_MEMPOOL_SIZE 2 struct sg_pool { size_t size; char *name; struct kmem_cache *slab; mempool_t *pool; }; #define SP(x) { .size = x, "sgpool-" __stringify(x) } #if (SG_CHUNK_SIZE < 32) #error SG_CHUNK_SIZE is too small (must be 32 or greater) #endif static struct sg_pool sg_pools[] = { SP(8), SP(16), #if (SG_CHUNK_SIZE > 32) SP(32), #if (SG_CHUNK_SIZE > 64) SP(64), #if (SG_CHUNK_SIZE > 128) SP(128), #if (SG_CHUNK_SIZE > 256) #error SG_CHUNK_SIZE is too large (256 MAX) #endif #endif #endif #endif SP(SG_CHUNK_SIZE) }; #undef SP static inline unsigned int sg_pool_index(unsigned short nents) { unsigned int index; BUG_ON(nents > SG_CHUNK_SIZE); if (nents <= 8) index = 0; else index = get_count_order(nents) - 3; return index; } static void sg_pool_free(struct scatterlist *sgl, unsigned int nents) { struct sg_pool *sgp; sgp = sg_pools + sg_pool_index(nents); mempool_free(sgl, sgp->pool); } static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask) { struct sg_pool *sgp; sgp = sg_pools + sg_pool_index(nents); return mempool_alloc(sgp->pool, gfp_mask); } /** * sg_free_table_chained - Free a previously mapped sg table * @table: The sg table header to use * @nents_first_chunk: size of the first_chunk SGL passed to * sg_alloc_table_chained * * Description: * Free an sg table previously allocated and setup with * sg_alloc_table_chained(). * * @nents_first_chunk has to be same with that same parameter passed * to sg_alloc_table_chained(). * **/ void sg_free_table_chained(struct sg_table *table, unsigned nents_first_chunk) { if (table->orig_nents <= nents_first_chunk) return; if (nents_first_chunk == 1) nents_first_chunk = 0; __sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free, table->orig_nents); } EXPORT_SYMBOL_GPL(sg_free_table_chained); /** * sg_alloc_table_chained - Allocate and chain SGLs in an sg table * @table: The sg table header to use * @nents: Number of entries in sg list * @first_chunk: first SGL * @nents_first_chunk: number of the SGL of @first_chunk * * Description: * Allocate and chain SGLs in an sg table. If @nents@ is larger than * @nents_first_chunk a chained sg table will be setup. @first_chunk is * ignored if nents_first_chunk <= 1 because user expects the SGL points * non-chain SGL. * **/ int sg_alloc_table_chained(struct sg_table *table, int nents, struct scatterlist *first_chunk, unsigned nents_first_chunk) { int ret; BUG_ON(!nents); if (first_chunk && nents_first_chunk) { if (nents <= nents_first_chunk) { table->nents = table->orig_nents = nents; sg_init_table(table->sgl, nents); return 0; } } /* User supposes that the 1st SGL includes real entry */ if (nents_first_chunk <= 1) { first_chunk = NULL; nents_first_chunk = 0; } ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE, first_chunk, nents_first_chunk, GFP_ATOMIC, sg_pool_alloc); if (unlikely(ret)) sg_free_table_chained(table, nents_first_chunk); return ret; } EXPORT_SYMBOL_GPL(sg_alloc_table_chained); static __init int sg_pool_init(void) { int i; for (i = 0; i < SG_MEMPOOL_NR; i++) { struct sg_pool *sgp = sg_pools + i; int size = sgp->size * sizeof(struct scatterlist); sgp->slab = kmem_cache_create(sgp->name, size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!sgp->slab) { printk(KERN_ERR "SG_POOL: can't init sg slab %s\n", sgp->name); goto cleanup_sdb; } sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE, sgp->slab); if (!sgp->pool) { printk(KERN_ERR "SG_POOL: can't init sg mempool %s\n", sgp->name); goto cleanup_sdb; } } return 0; cleanup_sdb: for (i = 0; i < SG_MEMPOOL_NR; i++) { struct sg_pool *sgp = sg_pools + i; mempool_destroy(sgp->pool); kmem_cache_destroy(sgp->slab); } return -ENOMEM; } subsys_initcall(sg_pool_init);
1 24 141 1448 117 117 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/bad_inode.c * * Copyright (C) 1997, Stephen Tweedie * * Provide stub functions for unreadable inodes * * Fabian Frederick : August 2003 - All file operations assigned to EIO */ #include <linux/fs.h> #include <linux/export.h> #include <linux/stat.h> #include <linux/time.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/fiemap.h> static int bad_file_open(struct inode *inode, struct file *filp) { return -EIO; } static const struct file_operations bad_file_ops = { .open = bad_file_open, }; static int bad_inode_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return -EIO; } static struct dentry *bad_inode_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return ERR_PTR(-EIO); } static int bad_inode_link (struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_unlink(struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { return -EIO; } static int bad_inode_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { return -EIO; } static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { return -EIO; } static int bad_inode_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { return -EIO; } static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, int buflen) { return -EIO; } static int bad_inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { return -EIO; } static int bad_inode_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { return -EIO; } static int bad_inode_setattr(struct mnt_idmap *idmap, struct dentry *direntry, struct iattr *attrs) { return -EIO; } static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { return -EIO; } static const char *bad_inode_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { return ERR_PTR(-EIO); } static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type, bool rcu) { return ERR_PTR(-EIO); } static int bad_inode_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { return -EIO; } static int bad_inode_update_time(struct inode *inode, int flags) { return -EIO; } static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, struct file *file, unsigned int open_flag, umode_t create_mode) { return -EIO; } static int bad_inode_tmpfile(struct mnt_idmap *idmap, struct inode *inode, struct file *file, umode_t mode) { return -EIO; } static int bad_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { return -EIO; } static const struct inode_operations bad_inode_ops = { .create = bad_inode_create, .lookup = bad_inode_lookup, .link = bad_inode_link, .unlink = bad_inode_unlink, .symlink = bad_inode_symlink, .mkdir = bad_inode_mkdir, .rmdir = bad_inode_rmdir, .mknod = bad_inode_mknod, .rename = bad_inode_rename2, .readlink = bad_inode_readlink, .permission = bad_inode_permission, .getattr = bad_inode_getattr, .setattr = bad_inode_setattr, .listxattr = bad_inode_listxattr, .get_link = bad_inode_get_link, .get_inode_acl = bad_inode_get_acl, .fiemap = bad_inode_fiemap, .update_time = bad_inode_update_time, .atomic_open = bad_inode_atomic_open, .tmpfile = bad_inode_tmpfile, .set_acl = bad_inode_set_acl, }; /* * When a filesystem is unable to read an inode due to an I/O error in * its read_inode() function, it can call make_bad_inode() to return a * set of stubs which will return EIO errors as required. * * We only need to do limited initialisation: all other fields are * preinitialised to zero automatically. */ /** * make_bad_inode - mark an inode bad due to an I/O error * @inode: Inode to mark bad * * When an inode cannot be read due to a media or remote network * failure this function makes the inode "bad" and causes I/O operations * on it to fail from this point on. */ void make_bad_inode(struct inode *inode) { remove_inode_hash(inode); inode->i_mode = S_IFREG; simple_inode_init_ts(inode); inode->i_op = &bad_inode_ops; inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &bad_file_ops; } EXPORT_SYMBOL(make_bad_inode); /* * This tests whether an inode has been flagged as bad. The test uses * &bad_inode_ops to cover the case of invalidated inodes as well as * those created by make_bad_inode() above. */ /** * is_bad_inode - is an inode errored * @inode: inode to test * * Returns true if the inode in question has been marked as bad. */ bool is_bad_inode(struct inode *inode) { return (inode->i_op == &bad_inode_ops); } EXPORT_SYMBOL(is_bad_inode); /** * iget_failed - Mark an under-construction inode as dead and release it * @inode: The inode to discard * * Mark an under-construction inode as dead and release it. */ void iget_failed(struct inode *inode) { make_bad_inode(inode); unlock_new_inode(inode); iput(inode); } EXPORT_SYMBOL(iget_failed);
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <asm/unaligned.h> #include "messages.h" #include "ctree.h" #include "accessors.h" static bool check_setget_bounds(const struct extent_buffer *eb, const void *ptr, unsigned off, int size) { const unsigned long member_offset = (unsigned long)ptr + off; if (unlikely(member_offset + size > eb->len)) { btrfs_warn(eb->fs_info, "bad eb member %s: ptr 0x%lx start %llu member offset %lu size %d", (member_offset > eb->len ? "start" : "end"), (unsigned long)ptr, eb->start, member_offset, size); return false; } return true; } void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb) { token->eb = eb; token->kaddr = folio_address(eb->folios[0]); token->offset = 0; } /* * Macro templates that define helpers to read/write extent buffer data of a * given size, that are also used via ctree.h for access to item members by * specialized helpers. * * Generic helpers: * - btrfs_set_8 (for 8/16/32/64) * - btrfs_get_8 (for 8/16/32/64) * * Generic helpers with a token (cached address of the most recently accessed * page): * - btrfs_set_token_8 (for 8/16/32/64) * - btrfs_get_token_8 (for 8/16/32/64) * * The set/get functions handle data spanning two pages transparently, in case * metadata block size is larger than page. Every pointer to metadata items is * an offset into the extent buffer page array, cast to a specific type. This * gives us all the type checking. * * The extent buffer pages stored in the array folios may not form a contiguous * phyusical range, but the API functions assume the linear offset to the range * from 0 to metadata node size. */ #define DEFINE_BTRFS_SETGET_BITS(bits) \ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ const void *ptr, unsigned long off) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \ const unsigned long oil = get_eb_offset_in_folio(token->eb, \ member_offset);\ const int unit_size = folio_size(token->eb->folios[0]); \ const int unit_shift = folio_shift(token->eb->folios[0]); \ const int size = sizeof(u##bits); \ u8 lebytes[sizeof(u##bits)]; \ const int part = unit_size - oil; \ \ ASSERT(token); \ ASSERT(token->kaddr); \ ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \ if (token->offset <= member_offset && \ member_offset + size <= token->offset + unit_size) { \ return get_unaligned_le##bits(token->kaddr + oil); \ } \ token->kaddr = folio_address(token->eb->folios[idx]); \ token->offset = idx << unit_shift; \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \ return get_unaligned_le##bits(token->kaddr + oil); \ \ memcpy(lebytes, token->kaddr + oil, part); \ token->kaddr = folio_address(token->eb->folios[idx + 1]); \ token->offset = (idx + 1) << unit_shift; \ memcpy(lebytes + part, token->kaddr, size - part); \ return get_unaligned_le##bits(lebytes); \ } \ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ const void *ptr, unsigned long off) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(eb, member_offset);\ const unsigned long oil = get_eb_offset_in_folio(eb, \ member_offset);\ const int unit_size = folio_size(eb->folios[0]); \ char *kaddr = folio_address(eb->folios[idx]); \ const int size = sizeof(u##bits); \ const int part = unit_size - oil; \ u8 lebytes[sizeof(u##bits)]; \ \ ASSERT(check_setget_bounds(eb, ptr, off, size)); \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \ return get_unaligned_le##bits(kaddr + oil); \ \ memcpy(lebytes, kaddr + oil, part); \ kaddr = folio_address(eb->folios[idx + 1]); \ memcpy(lebytes + part, kaddr, size - part); \ return get_unaligned_le##bits(lebytes); \ } \ void btrfs_set_token_##bits(struct btrfs_map_token *token, \ const void *ptr, unsigned long off, \ u##bits val) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \ const unsigned long oil = get_eb_offset_in_folio(token->eb, \ member_offset);\ const int unit_size = folio_size(token->eb->folios[0]); \ const int unit_shift = folio_shift(token->eb->folios[0]); \ const int size = sizeof(u##bits); \ u8 lebytes[sizeof(u##bits)]; \ const int part = unit_size - oil; \ \ ASSERT(token); \ ASSERT(token->kaddr); \ ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \ if (token->offset <= member_offset && \ member_offset + size <= token->offset + unit_size) { \ put_unaligned_le##bits(val, token->kaddr + oil); \ return; \ } \ token->kaddr = folio_address(token->eb->folios[idx]); \ token->offset = idx << unit_shift; \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || \ oil + size <= unit_size) { \ put_unaligned_le##bits(val, token->kaddr + oil); \ return; \ } \ put_unaligned_le##bits(val, lebytes); \ memcpy(token->kaddr + oil, lebytes, part); \ token->kaddr = folio_address(token->eb->folios[idx + 1]); \ token->offset = (idx + 1) << unit_shift; \ memcpy(token->kaddr, lebytes + part, size - part); \ } \ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ unsigned long off, u##bits val) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(eb, member_offset);\ const unsigned long oil = get_eb_offset_in_folio(eb, \ member_offset);\ const int unit_size = folio_size(eb->folios[0]); \ char *kaddr = folio_address(eb->folios[idx]); \ const int size = sizeof(u##bits); \ const int part = unit_size - oil; \ u8 lebytes[sizeof(u##bits)]; \ \ ASSERT(check_setget_bounds(eb, ptr, off, size)); \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || \ oil + size <= unit_size) { \ put_unaligned_le##bits(val, kaddr + oil); \ return; \ } \ \ put_unaligned_le##bits(val, lebytes); \ memcpy(kaddr + oil, lebytes, part); \ kaddr = folio_address(eb->folios[idx + 1]); \ memcpy(kaddr, lebytes + part, size - part); \ } DEFINE_BTRFS_SETGET_BITS(8) DEFINE_BTRFS_SETGET_BITS(16) DEFINE_BTRFS_SETGET_BITS(32) DEFINE_BTRFS_SETGET_BITS(64) void btrfs_node_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr = btrfs_node_key_ptr_offset(eb, nr); read_eb_member(eb, (struct btrfs_key_ptr *)ptr, struct btrfs_key_ptr, key, disk_key); }
16 16 2 2 3 3 3 1 1 7 7 7 7 9 9 9 9 9 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2015 Intel Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/hci_mon.h> #include <net/bluetooth/mgmt.h> #include "mgmt_util.h" static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, u16 opcode, u16 len, void *buf) { struct hci_mon_hdr *hdr; struct sk_buff *skb; skb = bt_skb_alloc(6 + len, GFP_ATOMIC); if (!skb) return NULL; put_unaligned_le32(cookie, skb_put(skb, 4)); put_unaligned_le16(opcode, skb_put(skb, 2)); if (buf) skb_put_data(skb, buf, len); __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); hdr->index = index; hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } struct sk_buff *mgmt_alloc_skb(struct hci_dev *hdev, u16 opcode, unsigned int size) { struct sk_buff *skb; skb = alloc_skb(sizeof(struct mgmt_hdr) + size, GFP_KERNEL); if (!skb) return skb; skb_reserve(skb, sizeof(struct mgmt_hdr)); bt_cb(skb)->mgmt.hdev = hdev; bt_cb(skb)->mgmt.opcode = opcode; return skb; } int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk) { struct hci_dev *hdev; struct mgmt_hdr *hdr; int len; if (!skb) return -EINVAL; len = skb->len; hdev = bt_cb(skb)->mgmt.hdev; /* Time stamp */ __net_timestamp(skb); /* Send just the data, without headers, to the monitor */ if (channel == HCI_CHANNEL_CONTROL) hci_send_monitor_ctrl_event(hdev, bt_cb(skb)->mgmt.opcode, skb->data, skb->len, skb_get_ktime(skb), flag, skip_sk); hdr = skb_push(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(bt_cb(skb)->mgmt.opcode); if (hdev) hdr->index = cpu_to_le16(hdev->id); else hdr->index = cpu_to_le16(MGMT_INDEX_NONE); hdr->len = cpu_to_le16(len); hci_send_to_channel(channel, skb, flag, skip_sk); kfree_skb(skb); return 0; } int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk) { struct sk_buff *skb; skb = mgmt_alloc_skb(hdev, event, data_len); if (!skb) return -ENOMEM; if (data) skb_put_data(skb, data, data_len); return mgmt_send_event_skb(channel, skb, flag, skip_sk); } int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; int err; BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_KERNEL); if (!skb) return -ENOMEM; hdr = skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev)); ev = skb_put(skb, sizeof(*ev)); ev->status = status; ev->opcode = cpu_to_le16(cmd); mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), MGMT_EV_CMD_STATUS, sizeof(*ev), ev); if (mskb) skb->tstamp = mskb->tstamp; else __net_timestamp(skb); err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); if (mskb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, HCI_SOCK_TRUSTED, NULL); kfree_skb(mskb); } return err; } int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, void *rp, size_t rp_len) { struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; int err; BT_DBG("sock %p", sk); skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); ev = skb_put(skb, sizeof(*ev) + rp_len); ev->opcode = cpu_to_le16(cmd); ev->status = status; if (rp) memcpy(ev->data, rp, rp_len); mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), MGMT_EV_CMD_COMPLETE, sizeof(*ev) + rp_len, ev); if (mskb) skb->tstamp = mskb->tstamp; else __net_timestamp(skb); err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); if (mskb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, HCI_SOCK_TRUSTED, NULL); kfree_skb(mskb); } return err; } struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { if (hci_sock_get_channel(cmd->sk) != channel) continue; if (cmd->opcode == opcode) return cmd; } return NULL; } struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel, u16 opcode, struct hci_dev *hdev, const void *data) { struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { if (cmd->user_data != data) continue; if (cmd->opcode == opcode) return cmd; } return NULL; } void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data) { struct mgmt_pending_cmd *cmd, *tmp; list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; cb(cmd, data); } } struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_pending_cmd *cmd; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return NULL; cmd->opcode = opcode; cmd->index = hdev->id; cmd->param = kmemdup(data, len, GFP_KERNEL); if (!cmd->param) { kfree(cmd); return NULL; } cmd->param_len = len; cmd->sk = sk; sock_hold(sk); return cmd; } struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_pending_cmd *cmd; cmd = mgmt_pending_new(sk, opcode, hdev, data, len); if (!cmd) return NULL; list_add_tail(&cmd->list, &hdev->mgmt_pending); return cmd; } void mgmt_pending_free(struct mgmt_pending_cmd *cmd) { sock_put(cmd->sk); kfree(cmd->param); kfree(cmd); } void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) { list_del(&cmd->list); mgmt_pending_free(cmd); } void mgmt_mesh_foreach(struct hci_dev *hdev, void (*cb)(struct mgmt_mesh_tx *mesh_tx, void *data), void *data, struct sock *sk) { struct mgmt_mesh_tx *mesh_tx, *tmp; list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) { if (!sk || mesh_tx->sk == sk) cb(mesh_tx, data); } } struct mgmt_mesh_tx *mgmt_mesh_next(struct hci_dev *hdev, struct sock *sk) { struct mgmt_mesh_tx *mesh_tx; if (list_empty(&hdev->mesh_pending)) return NULL; list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) { if (!sk || mesh_tx->sk == sk) return mesh_tx; } return NULL; } struct mgmt_mesh_tx *mgmt_mesh_find(struct hci_dev *hdev, u8 handle) { struct mgmt_mesh_tx *mesh_tx; if (list_empty(&hdev->mesh_pending)) return NULL; list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) { if (mesh_tx->handle == handle) return mesh_tx; } return NULL; } struct mgmt_mesh_tx *mgmt_mesh_add(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mesh_tx *mesh_tx; mesh_tx = kzalloc(sizeof(*mesh_tx), GFP_KERNEL); if (!mesh_tx) return NULL; hdev->mesh_send_ref++; if (!hdev->mesh_send_ref) hdev->mesh_send_ref++; mesh_tx->handle = hdev->mesh_send_ref; mesh_tx->index = hdev->id; memcpy(mesh_tx->param, data, len); mesh_tx->param_len = len; mesh_tx->sk = sk; sock_hold(sk); list_add_tail(&mesh_tx->list, &hdev->mesh_pending); return mesh_tx; } void mgmt_mesh_remove(struct mgmt_mesh_tx *mesh_tx) { list_del(&mesh_tx->list); sock_put(mesh_tx->sk); kfree(mesh_tx); }
7039 5247 2370 3622 995 271 273 489 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM timer #if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TIMER_H #include <linux/tracepoint.h> #include <linux/hrtimer.h> #include <linux/timer.h> DECLARE_EVENT_CLASS(timer_class, TP_PROTO(struct timer_list *timer), TP_ARGS(timer), TP_STRUCT__entry( __field( void *, timer ) ), TP_fast_assign( __entry->timer = timer; ), TP_printk("timer=%p", __entry->timer) ); /** * timer_init - called when the timer is initialized * @timer: pointer to struct timer_list */ DEFINE_EVENT(timer_class, timer_init, TP_PROTO(struct timer_list *timer), TP_ARGS(timer) ); #define decode_timer_flags(flags) \ __print_flags(flags, "|", \ { TIMER_MIGRATING, "M" }, \ { TIMER_DEFERRABLE, "D" }, \ { TIMER_PINNED, "P" }, \ { TIMER_IRQSAFE, "I" }) /** * timer_start - called when the timer is started * @timer: pointer to struct timer_list * @bucket_expiry: the bucket expiry time */ TRACE_EVENT(timer_start, TP_PROTO(struct timer_list *timer, unsigned long bucket_expiry), TP_ARGS(timer, bucket_expiry), TP_STRUCT__entry( __field( void *, timer ) __field( void *, function ) __field( unsigned long, expires ) __field( unsigned long, bucket_expiry ) __field( unsigned long, now ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->timer = timer; __entry->function = timer->function; __entry->expires = timer->expires; __entry->bucket_expiry = bucket_expiry; __entry->now = jiffies; __entry->flags = timer->flags; ), TP_printk("timer=%p function=%ps expires=%lu [timeout=%ld] bucket_expiry=%lu cpu=%u idx=%u flags=%s", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now, __entry->bucket_expiry, __entry->flags & TIMER_CPUMASK, __entry->flags >> TIMER_ARRAYSHIFT, decode_timer_flags(__entry->flags & TIMER_TRACE_FLAGMASK)) ); /** * timer_expire_entry - called immediately before the timer callback * @timer: pointer to struct timer_list * @baseclk: value of timer_base::clk when timer expires * * Allows to determine the timer latency. */ TRACE_EVENT(timer_expire_entry, TP_PROTO(struct timer_list *timer, unsigned long baseclk), TP_ARGS(timer, baseclk), TP_STRUCT__entry( __field( void *, timer ) __field( unsigned long, now ) __field( void *, function) __field( unsigned long, baseclk ) ), TP_fast_assign( __entry->timer = timer; __entry->now = jiffies; __entry->function = timer->function; __entry->baseclk = baseclk; ), TP_printk("timer=%p function=%ps now=%lu baseclk=%lu", __entry->timer, __entry->function, __entry->now, __entry->baseclk) ); /** * timer_expire_exit - called immediately after the timer callback returns * @timer: pointer to struct timer_list * * When used in combination with the timer_expire_entry tracepoint we can * determine the runtime of the timer callback function. * * NOTE: Do NOT dereference timer in TP_fast_assign. The pointer might * be invalid. We solely track the pointer. */ DEFINE_EVENT(timer_class, timer_expire_exit, TP_PROTO(struct timer_list *timer), TP_ARGS(timer) ); /** * timer_cancel - called when the timer is canceled * @timer: pointer to struct timer_list */ DEFINE_EVENT(timer_class, timer_cancel, TP_PROTO(struct timer_list *timer), TP_ARGS(timer) ); TRACE_EVENT(timer_base_idle, TP_PROTO(bool is_idle, unsigned int cpu), TP_ARGS(is_idle, cpu), TP_STRUCT__entry( __field( bool, is_idle ) __field( unsigned int, cpu ) ), TP_fast_assign( __entry->is_idle = is_idle; __entry->cpu = cpu; ), TP_printk("is_idle=%d cpu=%d", __entry->is_idle, __entry->cpu) ); #define decode_clockid(type) \ __print_symbolic(type, \ { CLOCK_REALTIME, "CLOCK_REALTIME" }, \ { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" }, \ { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" }, \ { CLOCK_TAI, "CLOCK_TAI" }) #define decode_hrtimer_mode(mode) \ __print_symbolic(mode, \ { HRTIMER_MODE_ABS, "ABS" }, \ { HRTIMER_MODE_REL, "REL" }, \ { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }, \ { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \ { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \ { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \ { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \ { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) /** * hrtimer_init - called when the hrtimer is initialized * @hrtimer: pointer to struct hrtimer * @clockid: the hrtimers clock * @mode: the hrtimers mode */ TRACE_EVENT(hrtimer_init, TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, decode_clockid(__entry->clockid), decode_hrtimer_mode(__entry->mode)) ); /** * hrtimer_start - called when the hrtimer is started * @hrtimer: pointer to struct hrtimer * @mode: the hrtimers mode */ TRACE_EVENT(hrtimer_start, TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode), TP_ARGS(hrtimer, mode), TP_STRUCT__entry( __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->function = hrtimer->function; __entry->expires = hrtimer_get_expires(hrtimer); __entry->softexpires = hrtimer_get_softexpires(hrtimer); __entry->mode = mode; ), TP_printk("hrtimer=%p function=%ps expires=%llu softexpires=%llu " "mode=%s", __entry->hrtimer, __entry->function, (unsigned long long) __entry->expires, (unsigned long long) __entry->softexpires, decode_hrtimer_mode(__entry->mode)) ); /** * hrtimer_expire_entry - called immediately before the hrtimer callback * @hrtimer: pointer to struct hrtimer * @now: pointer to variable which contains current time of the * timers base. * * Allows to determine the timer latency. */ TRACE_EVENT(hrtimer_expire_entry, TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), TP_ARGS(hrtimer, now), TP_STRUCT__entry( __field( void *, hrtimer ) __field( s64, now ) __field( void *, function) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->now = *now; __entry->function = hrtimer->function; ), TP_printk("hrtimer=%p function=%ps now=%llu", __entry->hrtimer, __entry->function, (unsigned long long) __entry->now) ); DECLARE_EVENT_CLASS(hrtimer_class, TP_PROTO(struct hrtimer *hrtimer), TP_ARGS(hrtimer), TP_STRUCT__entry( __field( void *, hrtimer ) ), TP_fast_assign( __entry->hrtimer = hrtimer; ), TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_expire_exit - called immediately after the hrtimer callback returns * @hrtimer: pointer to struct hrtimer * * When used in combination with the hrtimer_expire_entry tracepoint we can * determine the runtime of the callback function. */ DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit, TP_PROTO(struct hrtimer *hrtimer), TP_ARGS(hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled * @hrtimer: pointer to struct hrtimer */ DEFINE_EVENT(hrtimer_class, hrtimer_cancel, TP_PROTO(struct hrtimer *hrtimer), TP_ARGS(hrtimer) ); /** * itimer_state - called when itimer is started or canceled * @which: name of the interval timer * @value: the itimers value, itimer is canceled if value->it_value is * zero, otherwise it is started * @expires: the itimers expiry time */ TRACE_EVENT(itimer_state, TP_PROTO(int which, const struct itimerspec64 *const value, unsigned long long expires), TP_ARGS(which, value, expires), TP_STRUCT__entry( __field( int, which ) __field( unsigned long long, expires ) __field( long, value_sec ) __field( long, value_nsec ) __field( long, interval_sec ) __field( long, interval_nsec ) ), TP_fast_assign( __entry->which = which; __entry->expires = expires; __entry->value_sec = value->it_value.tv_sec; __entry->value_nsec = value->it_value.tv_nsec; __entry->interval_sec = value->it_interval.tv_sec; __entry->interval_nsec = value->it_interval.tv_nsec; ), TP_printk("which=%d expires=%llu it_value=%ld.%06ld it_interval=%ld.%06ld", __entry->which, __entry->expires, __entry->value_sec, __entry->value_nsec / NSEC_PER_USEC, __entry->interval_sec, __entry->interval_nsec / NSEC_PER_USEC) ); /** * itimer_expire - called when itimer expires * @which: type of the interval timer * @pid: pid of the process which owns the timer * @now: current time, used to calculate the latency of itimer */ TRACE_EVENT(itimer_expire, TP_PROTO(int which, struct pid *pid, unsigned long long now), TP_ARGS(which, pid, now), TP_STRUCT__entry( __field( int , which ) __field( pid_t, pid ) __field( unsigned long long, now ) ), TP_fast_assign( __entry->which = which; __entry->now = now; __entry->pid = pid_nr(pid); ), TP_printk("which=%d pid=%d now=%llu", __entry->which, (int) __entry->pid, __entry->now) ); #ifdef CONFIG_NO_HZ_COMMON #define TICK_DEP_NAMES \ tick_dep_mask_name(NONE) \ tick_dep_name(POSIX_TIMER) \ tick_dep_name(PERF_EVENTS) \ tick_dep_name(SCHED) \ tick_dep_name(CLOCK_UNSTABLE) \ tick_dep_name(RCU) \ tick_dep_name_end(RCU_EXP) #undef tick_dep_name #undef tick_dep_mask_name #undef tick_dep_name_end /* The MASK will convert to their bits and they need to be processed too */ #define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); #define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); /* NONE only has a mask defined for it */ #define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); TICK_DEP_NAMES #undef tick_dep_name #undef tick_dep_mask_name #undef tick_dep_name_end #define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep }, #define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep }, #define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep } #define show_tick_dep_name(val) \ __print_symbolic(val, TICK_DEP_NAMES) TRACE_EVENT(tick_stop, TP_PROTO(int success, int dependency), TP_ARGS(success, dependency), TP_STRUCT__entry( __field( int , success ) __field( int , dependency ) ), TP_fast_assign( __entry->success = success; __entry->dependency = dependency; ), TP_printk("success=%d dependency=%s", __entry->success, \ show_tick_dep_name(__entry->dependency)) ); #endif #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 /* * Cryptographic API. * * AES Cipher Algorithm. * * Based on Brian Gladman's code. * * Linux developers: * Alexander Kjeldaas <astor@fast.no> * Herbert Valerio Riedel <hvr@hvrlab.org> * Kyle McMartin <kyle@debian.org> * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API). * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * --------------------------------------------------------------------------- * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK. * All rights reserved. * * LICENSE TERMS * * The free distribution and use of this software in both source and binary * form is allowed (with or without changes) provided that: * * 1. distributions of this source code include the above copyright * notice, this list of conditions and the following disclaimer; * * 2. distributions in binary form include the above copyright * notice, this list of conditions and the following disclaimer * in the documentation and/or other associated materials; * * 3. the copyright holder's name is not used to endorse products * built using this software without specific written permission. * * ALTERNATIVELY, provided that this notice is retained in full, this product * may be distributed under the terms of the GNU General Public License (GPL), * in which case the provisions of the GPL apply INSTEAD OF those given above. * * DISCLAIMER * * This software is provided 'as is' with no explicit or implied warranties * in respect of its properties, including, but not limited to, correctness * and/or fitness for purpose. * --------------------------------------------------------------------------- */ #include <crypto/aes.h> #include <crypto/algapi.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> #include <asm/byteorder.h> #include <asm/unaligned.h> static inline u8 byte(const u32 x, const unsigned n) { return x >> (n << 3); } /* cacheline-aligned to facilitate prefetching into cache */ __visible const u32 crypto_ft_tab[4][256] ____cacheline_aligned = { { 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c, }, { 0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, 0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, 0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, 0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, 0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, 0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, 0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, 0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, 0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, 0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, 0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a, }, { 0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, 0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, 0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, 0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, 0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, 0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, 0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, 0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, 0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, 0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, 0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16, }, { 0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, 0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, 0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, 0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, 0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, 0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, 0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, 0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, 0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, 0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, 0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616, } }; static const u32 crypto_fl_tab[4][256] ____cacheline_aligned = { { 0x00000063, 0x0000007c, 0x00000077, 0x0000007b, 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5, 0x00000030, 0x00000001, 0x00000067, 0x0000002b, 0x000000fe, 0x000000d7, 0x000000ab, 0x00000076, 0x000000ca, 0x00000082, 0x000000c9, 0x0000007d, 0x000000fa, 0x00000059, 0x00000047, 0x000000f0, 0x000000ad, 0x000000d4, 0x000000a2, 0x000000af, 0x0000009c, 0x000000a4, 0x00000072, 0x000000c0, 0x000000b7, 0x000000fd, 0x00000093, 0x00000026, 0x00000036, 0x0000003f, 0x000000f7, 0x000000cc, 0x00000034, 0x000000a5, 0x000000e5, 0x000000f1, 0x00000071, 0x000000d8, 0x00000031, 0x00000015, 0x00000004, 0x000000c7, 0x00000023, 0x000000c3, 0x00000018, 0x00000096, 0x00000005, 0x0000009a, 0x00000007, 0x00000012, 0x00000080, 0x000000e2, 0x000000eb, 0x00000027, 0x000000b2, 0x00000075, 0x00000009, 0x00000083, 0x0000002c, 0x0000001a, 0x0000001b, 0x0000006e, 0x0000005a, 0x000000a0, 0x00000052, 0x0000003b, 0x000000d6, 0x000000b3, 0x00000029, 0x000000e3, 0x0000002f, 0x00000084, 0x00000053, 0x000000d1, 0x00000000, 0x000000ed, 0x00000020, 0x000000fc, 0x000000b1, 0x0000005b, 0x0000006a, 0x000000cb, 0x000000be, 0x00000039, 0x0000004a, 0x0000004c, 0x00000058, 0x000000cf, 0x000000d0, 0x000000ef, 0x000000aa, 0x000000fb, 0x00000043, 0x0000004d, 0x00000033, 0x00000085, 0x00000045, 0x000000f9, 0x00000002, 0x0000007f, 0x00000050, 0x0000003c, 0x0000009f, 0x000000a8, 0x00000051, 0x000000a3, 0x00000040, 0x0000008f, 0x00000092, 0x0000009d, 0x00000038, 0x000000f5, 0x000000bc, 0x000000b6, 0x000000da, 0x00000021, 0x00000010, 0x000000ff, 0x000000f3, 0x000000d2, 0x000000cd, 0x0000000c, 0x00000013, 0x000000ec, 0x0000005f, 0x00000097, 0x00000044, 0x00000017, 0x000000c4, 0x000000a7, 0x0000007e, 0x0000003d, 0x00000064, 0x0000005d, 0x00000019, 0x00000073, 0x00000060, 0x00000081, 0x0000004f, 0x000000dc, 0x00000022, 0x0000002a, 0x00000090, 0x00000088, 0x00000046, 0x000000ee, 0x000000b8, 0x00000014, 0x000000de, 0x0000005e, 0x0000000b, 0x000000db, 0x000000e0, 0x00000032, 0x0000003a, 0x0000000a, 0x00000049, 0x00000006, 0x00000024, 0x0000005c, 0x000000c2, 0x000000d3, 0x000000ac, 0x00000062, 0x00000091, 0x00000095, 0x000000e4, 0x00000079, 0x000000e7, 0x000000c8, 0x00000037, 0x0000006d, 0x0000008d, 0x000000d5, 0x0000004e, 0x000000a9, 0x0000006c, 0x00000056, 0x000000f4, 0x000000ea, 0x00000065, 0x0000007a, 0x000000ae, 0x00000008, 0x000000ba, 0x00000078, 0x00000025, 0x0000002e, 0x0000001c, 0x000000a6, 0x000000b4, 0x000000c6, 0x000000e8, 0x000000dd, 0x00000074, 0x0000001f, 0x0000004b, 0x000000bd, 0x0000008b, 0x0000008a, 0x00000070, 0x0000003e, 0x000000b5, 0x00000066, 0x00000048, 0x00000003, 0x000000f6, 0x0000000e, 0x00000061, 0x00000035, 0x00000057, 0x000000b9, 0x00000086, 0x000000c1, 0x0000001d, 0x0000009e, 0x000000e1, 0x000000f8, 0x00000098, 0x00000011, 0x00000069, 0x000000d9, 0x0000008e, 0x00000094, 0x0000009b, 0x0000001e, 0x00000087, 0x000000e9, 0x000000ce, 0x00000055, 0x00000028, 0x000000df, 0x0000008c, 0x000000a1, 0x00000089, 0x0000000d, 0x000000bf, 0x000000e6, 0x00000042, 0x00000068, 0x00000041, 0x00000099, 0x0000002d, 0x0000000f, 0x000000b0, 0x00000054, 0x000000bb, 0x00000016, }, { 0x00006300, 0x00007c00, 0x00007700, 0x00007b00, 0x0000f200, 0x00006b00, 0x00006f00, 0x0000c500, 0x00003000, 0x00000100, 0x00006700, 0x00002b00, 0x0000fe00, 0x0000d700, 0x0000ab00, 0x00007600, 0x0000ca00, 0x00008200, 0x0000c900, 0x00007d00, 0x0000fa00, 0x00005900, 0x00004700, 0x0000f000, 0x0000ad00, 0x0000d400, 0x0000a200, 0x0000af00, 0x00009c00, 0x0000a400, 0x00007200, 0x0000c000, 0x0000b700, 0x0000fd00, 0x00009300, 0x00002600, 0x00003600, 0x00003f00, 0x0000f700, 0x0000cc00, 0x00003400, 0x0000a500, 0x0000e500, 0x0000f100, 0x00007100, 0x0000d800, 0x00003100, 0x00001500, 0x00000400, 0x0000c700, 0x00002300, 0x0000c300, 0x00001800, 0x00009600, 0x00000500, 0x00009a00, 0x00000700, 0x00001200, 0x00008000, 0x0000e200, 0x0000eb00, 0x00002700, 0x0000b200, 0x00007500, 0x00000900, 0x00008300, 0x00002c00, 0x00001a00, 0x00001b00, 0x00006e00, 0x00005a00, 0x0000a000, 0x00005200, 0x00003b00, 0x0000d600, 0x0000b300, 0x00002900, 0x0000e300, 0x00002f00, 0x00008400, 0x00005300, 0x0000d100, 0x00000000, 0x0000ed00, 0x00002000, 0x0000fc00, 0x0000b100, 0x00005b00, 0x00006a00, 0x0000cb00, 0x0000be00, 0x00003900, 0x00004a00, 0x00004c00, 0x00005800, 0x0000cf00, 0x0000d000, 0x0000ef00, 0x0000aa00, 0x0000fb00, 0x00004300, 0x00004d00, 0x00003300, 0x00008500, 0x00004500, 0x0000f900, 0x00000200, 0x00007f00, 0x00005000, 0x00003c00, 0x00009f00, 0x0000a800, 0x00005100, 0x0000a300, 0x00004000, 0x00008f00, 0x00009200, 0x00009d00, 0x00003800, 0x0000f500, 0x0000bc00, 0x0000b600, 0x0000da00, 0x00002100, 0x00001000, 0x0000ff00, 0x0000f300, 0x0000d200, 0x0000cd00, 0x00000c00, 0x00001300, 0x0000ec00, 0x00005f00, 0x00009700, 0x00004400, 0x00001700, 0x0000c400, 0x0000a700, 0x00007e00, 0x00003d00, 0x00006400, 0x00005d00, 0x00001900, 0x00007300, 0x00006000, 0x00008100, 0x00004f00, 0x0000dc00, 0x00002200, 0x00002a00, 0x00009000, 0x00008800, 0x00004600, 0x0000ee00, 0x0000b800, 0x00001400, 0x0000de00, 0x00005e00, 0x00000b00, 0x0000db00, 0x0000e000, 0x00003200, 0x00003a00, 0x00000a00, 0x00004900, 0x00000600, 0x00002400, 0x00005c00, 0x0000c200, 0x0000d300, 0x0000ac00, 0x00006200, 0x00009100, 0x00009500, 0x0000e400, 0x00007900, 0x0000e700, 0x0000c800, 0x00003700, 0x00006d00, 0x00008d00, 0x0000d500, 0x00004e00, 0x0000a900, 0x00006c00, 0x00005600, 0x0000f400, 0x0000ea00, 0x00006500, 0x00007a00, 0x0000ae00, 0x00000800, 0x0000ba00, 0x00007800, 0x00002500, 0x00002e00, 0x00001c00, 0x0000a600, 0x0000b400, 0x0000c600, 0x0000e800, 0x0000dd00, 0x00007400, 0x00001f00, 0x00004b00, 0x0000bd00, 0x00008b00, 0x00008a00, 0x00007000, 0x00003e00, 0x0000b500, 0x00006600, 0x00004800, 0x00000300, 0x0000f600, 0x00000e00, 0x00006100, 0x00003500, 0x00005700, 0x0000b900, 0x00008600, 0x0000c100, 0x00001d00, 0x00009e00, 0x0000e100, 0x0000f800, 0x00009800, 0x00001100, 0x00006900, 0x0000d900, 0x00008e00, 0x00009400, 0x00009b00, 0x00001e00, 0x00008700, 0x0000e900, 0x0000ce00, 0x00005500, 0x00002800, 0x0000df00, 0x00008c00, 0x0000a100, 0x00008900, 0x00000d00, 0x0000bf00, 0x0000e600, 0x00004200, 0x00006800, 0x00004100, 0x00009900, 0x00002d00, 0x00000f00, 0x0000b000, 0x00005400, 0x0000bb00, 0x00001600, }, { 0x00630000, 0x007c0000, 0x00770000, 0x007b0000, 0x00f20000, 0x006b0000, 0x006f0000, 0x00c50000, 0x00300000, 0x00010000, 0x00670000, 0x002b0000, 0x00fe0000, 0x00d70000, 0x00ab0000, 0x00760000, 0x00ca0000, 0x00820000, 0x00c90000, 0x007d0000, 0x00fa0000, 0x00590000, 0x00470000, 0x00f00000, 0x00ad0000, 0x00d40000, 0x00a20000, 0x00af0000, 0x009c0000, 0x00a40000, 0x00720000, 0x00c00000, 0x00b70000, 0x00fd0000, 0x00930000, 0x00260000, 0x00360000, 0x003f0000, 0x00f70000, 0x00cc0000, 0x00340000, 0x00a50000, 0x00e50000, 0x00f10000, 0x00710000, 0x00d80000, 0x00310000, 0x00150000, 0x00040000, 0x00c70000, 0x00230000, 0x00c30000, 0x00180000, 0x00960000, 0x00050000, 0x009a0000, 0x00070000, 0x00120000, 0x00800000, 0x00e20000, 0x00eb0000, 0x00270000, 0x00b20000, 0x00750000, 0x00090000, 0x00830000, 0x002c0000, 0x001a0000, 0x001b0000, 0x006e0000, 0x005a0000, 0x00a00000, 0x00520000, 0x003b0000, 0x00d60000, 0x00b30000, 0x00290000, 0x00e30000, 0x002f0000, 0x00840000, 0x00530000, 0x00d10000, 0x00000000, 0x00ed0000, 0x00200000, 0x00fc0000, 0x00b10000, 0x005b0000, 0x006a0000, 0x00cb0000, 0x00be0000, 0x00390000, 0x004a0000, 0x004c0000, 0x00580000, 0x00cf0000, 0x00d00000, 0x00ef0000, 0x00aa0000, 0x00fb0000, 0x00430000, 0x004d0000, 0x00330000, 0x00850000, 0x00450000, 0x00f90000, 0x00020000, 0x007f0000, 0x00500000, 0x003c0000, 0x009f0000, 0x00a80000, 0x00510000, 0x00a30000, 0x00400000, 0x008f0000, 0x00920000, 0x009d0000, 0x00380000, 0x00f50000, 0x00bc0000, 0x00b60000, 0x00da0000, 0x00210000, 0x00100000, 0x00ff0000, 0x00f30000, 0x00d20000, 0x00cd0000, 0x000c0000, 0x00130000, 0x00ec0000, 0x005f0000, 0x00970000, 0x00440000, 0x00170000, 0x00c40000, 0x00a70000, 0x007e0000, 0x003d0000, 0x00640000, 0x005d0000, 0x00190000, 0x00730000, 0x00600000, 0x00810000, 0x004f0000, 0x00dc0000, 0x00220000, 0x002a0000, 0x00900000, 0x00880000, 0x00460000, 0x00ee0000, 0x00b80000, 0x00140000, 0x00de0000, 0x005e0000, 0x000b0000, 0x00db0000, 0x00e00000, 0x00320000, 0x003a0000, 0x000a0000, 0x00490000, 0x00060000, 0x00240000, 0x005c0000, 0x00c20000, 0x00d30000, 0x00ac0000, 0x00620000, 0x00910000, 0x00950000, 0x00e40000, 0x00790000, 0x00e70000, 0x00c80000, 0x00370000, 0x006d0000, 0x008d0000, 0x00d50000, 0x004e0000, 0x00a90000, 0x006c0000, 0x00560000, 0x00f40000, 0x00ea0000, 0x00650000, 0x007a0000, 0x00ae0000, 0x00080000, 0x00ba0000, 0x00780000, 0x00250000, 0x002e0000, 0x001c0000, 0x00a60000, 0x00b40000, 0x00c60000, 0x00e80000, 0x00dd0000, 0x00740000, 0x001f0000, 0x004b0000, 0x00bd0000, 0x008b0000, 0x008a0000, 0x00700000, 0x003e0000, 0x00b50000, 0x00660000, 0x00480000, 0x00030000, 0x00f60000, 0x000e0000, 0x00610000, 0x00350000, 0x00570000, 0x00b90000, 0x00860000, 0x00c10000, 0x001d0000, 0x009e0000, 0x00e10000, 0x00f80000, 0x00980000, 0x00110000, 0x00690000, 0x00d90000, 0x008e0000, 0x00940000, 0x009b0000, 0x001e0000, 0x00870000, 0x00e90000, 0x00ce0000, 0x00550000, 0x00280000, 0x00df0000, 0x008c0000, 0x00a10000, 0x00890000, 0x000d0000, 0x00bf0000, 0x00e60000, 0x00420000, 0x00680000, 0x00410000, 0x00990000, 0x002d0000, 0x000f0000, 0x00b00000, 0x00540000, 0x00bb0000, 0x00160000, }, { 0x63000000, 0x7c000000, 0x77000000, 0x7b000000, 0xf2000000, 0x6b000000, 0x6f000000, 0xc5000000, 0x30000000, 0x01000000, 0x67000000, 0x2b000000, 0xfe000000, 0xd7000000, 0xab000000, 0x76000000, 0xca000000, 0x82000000, 0xc9000000, 0x7d000000, 0xfa000000, 0x59000000, 0x47000000, 0xf0000000, 0xad000000, 0xd4000000, 0xa2000000, 0xaf000000, 0x9c000000, 0xa4000000, 0x72000000, 0xc0000000, 0xb7000000, 0xfd000000, 0x93000000, 0x26000000, 0x36000000, 0x3f000000, 0xf7000000, 0xcc000000, 0x34000000, 0xa5000000, 0xe5000000, 0xf1000000, 0x71000000, 0xd8000000, 0x31000000, 0x15000000, 0x04000000, 0xc7000000, 0x23000000, 0xc3000000, 0x18000000, 0x96000000, 0x05000000, 0x9a000000, 0x07000000, 0x12000000, 0x80000000, 0xe2000000, 0xeb000000, 0x27000000, 0xb2000000, 0x75000000, 0x09000000, 0x83000000, 0x2c000000, 0x1a000000, 0x1b000000, 0x6e000000, 0x5a000000, 0xa0000000, 0x52000000, 0x3b000000, 0xd6000000, 0xb3000000, 0x29000000, 0xe3000000, 0x2f000000, 0x84000000, 0x53000000, 0xd1000000, 0x00000000, 0xed000000, 0x20000000, 0xfc000000, 0xb1000000, 0x5b000000, 0x6a000000, 0xcb000000, 0xbe000000, 0x39000000, 0x4a000000, 0x4c000000, 0x58000000, 0xcf000000, 0xd0000000, 0xef000000, 0xaa000000, 0xfb000000, 0x43000000, 0x4d000000, 0x33000000, 0x85000000, 0x45000000, 0xf9000000, 0x02000000, 0x7f000000, 0x50000000, 0x3c000000, 0x9f000000, 0xa8000000, 0x51000000, 0xa3000000, 0x40000000, 0x8f000000, 0x92000000, 0x9d000000, 0x38000000, 0xf5000000, 0xbc000000, 0xb6000000, 0xda000000, 0x21000000, 0x10000000, 0xff000000, 0xf3000000, 0xd2000000, 0xcd000000, 0x0c000000, 0x13000000, 0xec000000, 0x5f000000, 0x97000000, 0x44000000, 0x17000000, 0xc4000000, 0xa7000000, 0x7e000000, 0x3d000000, 0x64000000, 0x5d000000, 0x19000000, 0x73000000, 0x60000000, 0x81000000, 0x4f000000, 0xdc000000, 0x22000000, 0x2a000000, 0x90000000, 0x88000000, 0x46000000, 0xee000000, 0xb8000000, 0x14000000, 0xde000000, 0x5e000000, 0x0b000000, 0xdb000000, 0xe0000000, 0x32000000, 0x3a000000, 0x0a000000, 0x49000000, 0x06000000, 0x24000000, 0x5c000000, 0xc2000000, 0xd3000000, 0xac000000, 0x62000000, 0x91000000, 0x95000000, 0xe4000000, 0x79000000, 0xe7000000, 0xc8000000, 0x37000000, 0x6d000000, 0x8d000000, 0xd5000000, 0x4e000000, 0xa9000000, 0x6c000000, 0x56000000, 0xf4000000, 0xea000000, 0x65000000, 0x7a000000, 0xae000000, 0x08000000, 0xba000000, 0x78000000, 0x25000000, 0x2e000000, 0x1c000000, 0xa6000000, 0xb4000000, 0xc6000000, 0xe8000000, 0xdd000000, 0x74000000, 0x1f000000, 0x4b000000, 0xbd000000, 0x8b000000, 0x8a000000, 0x70000000, 0x3e000000, 0xb5000000, 0x66000000, 0x48000000, 0x03000000, 0xf6000000, 0x0e000000, 0x61000000, 0x35000000, 0x57000000, 0xb9000000, 0x86000000, 0xc1000000, 0x1d000000, 0x9e000000, 0xe1000000, 0xf8000000, 0x98000000, 0x11000000, 0x69000000, 0xd9000000, 0x8e000000, 0x94000000, 0x9b000000, 0x1e000000, 0x87000000, 0xe9000000, 0xce000000, 0x55000000, 0x28000000, 0xdf000000, 0x8c000000, 0xa1000000, 0x89000000, 0x0d000000, 0xbf000000, 0xe6000000, 0x42000000, 0x68000000, 0x41000000, 0x99000000, 0x2d000000, 0x0f000000, 0xb0000000, 0x54000000, 0xbb000000, 0x16000000, } }; __visible const u32 crypto_it_tab[4][256] ____cacheline_aligned = { { 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0, }, { 0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, 0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, 0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, 0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, 0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, 0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, 0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, 0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, 0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, 0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, 0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042, }, { 0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, 0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, 0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, 0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, 0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, 0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, 0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, 0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, 0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, 0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, 0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257, }, { 0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, 0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, 0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, 0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, 0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, 0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, 0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, 0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, 0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, 0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, 0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8, } }; static const u32 crypto_il_tab[4][256] ____cacheline_aligned = { { 0x00000052, 0x00000009, 0x0000006a, 0x000000d5, 0x00000030, 0x00000036, 0x000000a5, 0x00000038, 0x000000bf, 0x00000040, 0x000000a3, 0x0000009e, 0x00000081, 0x000000f3, 0x000000d7, 0x000000fb, 0x0000007c, 0x000000e3, 0x00000039, 0x00000082, 0x0000009b, 0x0000002f, 0x000000ff, 0x00000087, 0x00000034, 0x0000008e, 0x00000043, 0x00000044, 0x000000c4, 0x000000de, 0x000000e9, 0x000000cb, 0x00000054, 0x0000007b, 0x00000094, 0x00000032, 0x000000a6, 0x000000c2, 0x00000023, 0x0000003d, 0x000000ee, 0x0000004c, 0x00000095, 0x0000000b, 0x00000042, 0x000000fa, 0x000000c3, 0x0000004e, 0x00000008, 0x0000002e, 0x000000a1, 0x00000066, 0x00000028, 0x000000d9, 0x00000024, 0x000000b2, 0x00000076, 0x0000005b, 0x000000a2, 0x00000049, 0x0000006d, 0x0000008b, 0x000000d1, 0x00000025, 0x00000072, 0x000000f8, 0x000000f6, 0x00000064, 0x00000086, 0x00000068, 0x00000098, 0x00000016, 0x000000d4, 0x000000a4, 0x0000005c, 0x000000cc, 0x0000005d, 0x00000065, 0x000000b6, 0x00000092, 0x0000006c, 0x00000070, 0x00000048, 0x00000050, 0x000000fd, 0x000000ed, 0x000000b9, 0x000000da, 0x0000005e, 0x00000015, 0x00000046, 0x00000057, 0x000000a7, 0x0000008d, 0x0000009d, 0x00000084, 0x00000090, 0x000000d8, 0x000000ab, 0x00000000, 0x0000008c, 0x000000bc, 0x000000d3, 0x0000000a, 0x000000f7, 0x000000e4, 0x00000058, 0x00000005, 0x000000b8, 0x000000b3, 0x00000045, 0x00000006, 0x000000d0, 0x0000002c, 0x0000001e, 0x0000008f, 0x000000ca, 0x0000003f, 0x0000000f, 0x00000002, 0x000000c1, 0x000000af, 0x000000bd, 0x00000003, 0x00000001, 0x00000013, 0x0000008a, 0x0000006b, 0x0000003a, 0x00000091, 0x00000011, 0x00000041, 0x0000004f, 0x00000067, 0x000000dc, 0x000000ea, 0x00000097, 0x000000f2, 0x000000cf, 0x000000ce, 0x000000f0, 0x000000b4, 0x000000e6, 0x00000073, 0x00000096, 0x000000ac, 0x00000074, 0x00000022, 0x000000e7, 0x000000ad, 0x00000035, 0x00000085, 0x000000e2, 0x000000f9, 0x00000037, 0x000000e8, 0x0000001c, 0x00000075, 0x000000df, 0x0000006e, 0x00000047, 0x000000f1, 0x0000001a, 0x00000071, 0x0000001d, 0x00000029, 0x000000c5, 0x00000089, 0x0000006f, 0x000000b7, 0x00000062, 0x0000000e, 0x000000aa, 0x00000018, 0x000000be, 0x0000001b, 0x000000fc, 0x00000056, 0x0000003e, 0x0000004b, 0x000000c6, 0x000000d2, 0x00000079, 0x00000020, 0x0000009a, 0x000000db, 0x000000c0, 0x000000fe, 0x00000078, 0x000000cd, 0x0000005a, 0x000000f4, 0x0000001f, 0x000000dd, 0x000000a8, 0x00000033, 0x00000088, 0x00000007, 0x000000c7, 0x00000031, 0x000000b1, 0x00000012, 0x00000010, 0x00000059, 0x00000027, 0x00000080, 0x000000ec, 0x0000005f, 0x00000060, 0x00000051, 0x0000007f, 0x000000a9, 0x00000019, 0x000000b5, 0x0000004a, 0x0000000d, 0x0000002d, 0x000000e5, 0x0000007a, 0x0000009f, 0x00000093, 0x000000c9, 0x0000009c, 0x000000ef, 0x000000a0, 0x000000e0, 0x0000003b, 0x0000004d, 0x000000ae, 0x0000002a, 0x000000f5, 0x000000b0, 0x000000c8, 0x000000eb, 0x000000bb, 0x0000003c, 0x00000083, 0x00000053, 0x00000099, 0x00000061, 0x00000017, 0x0000002b, 0x00000004, 0x0000007e, 0x000000ba, 0x00000077, 0x000000d6, 0x00000026, 0x000000e1, 0x00000069, 0x00000014, 0x00000063, 0x00000055, 0x00000021, 0x0000000c, 0x0000007d, }, { 0x00005200, 0x00000900, 0x00006a00, 0x0000d500, 0x00003000, 0x00003600, 0x0000a500, 0x00003800, 0x0000bf00, 0x00004000, 0x0000a300, 0x00009e00, 0x00008100, 0x0000f300, 0x0000d700, 0x0000fb00, 0x00007c00, 0x0000e300, 0x00003900, 0x00008200, 0x00009b00, 0x00002f00, 0x0000ff00, 0x00008700, 0x00003400, 0x00008e00, 0x00004300, 0x00004400, 0x0000c400, 0x0000de00, 0x0000e900, 0x0000cb00, 0x00005400, 0x00007b00, 0x00009400, 0x00003200, 0x0000a600, 0x0000c200, 0x00002300, 0x00003d00, 0x0000ee00, 0x00004c00, 0x00009500, 0x00000b00, 0x00004200, 0x0000fa00, 0x0000c300, 0x00004e00, 0x00000800, 0x00002e00, 0x0000a100, 0x00006600, 0x00002800, 0x0000d900, 0x00002400, 0x0000b200, 0x00007600, 0x00005b00, 0x0000a200, 0x00004900, 0x00006d00, 0x00008b00, 0x0000d100, 0x00002500, 0x00007200, 0x0000f800, 0x0000f600, 0x00006400, 0x00008600, 0x00006800, 0x00009800, 0x00001600, 0x0000d400, 0x0000a400, 0x00005c00, 0x0000cc00, 0x00005d00, 0x00006500, 0x0000b600, 0x00009200, 0x00006c00, 0x00007000, 0x00004800, 0x00005000, 0x0000fd00, 0x0000ed00, 0x0000b900, 0x0000da00, 0x00005e00, 0x00001500, 0x00004600, 0x00005700, 0x0000a700, 0x00008d00, 0x00009d00, 0x00008400, 0x00009000, 0x0000d800, 0x0000ab00, 0x00000000, 0x00008c00, 0x0000bc00, 0x0000d300, 0x00000a00, 0x0000f700, 0x0000e400, 0x00005800, 0x00000500, 0x0000b800, 0x0000b300, 0x00004500, 0x00000600, 0x0000d000, 0x00002c00, 0x00001e00, 0x00008f00, 0x0000ca00, 0x00003f00, 0x00000f00, 0x00000200, 0x0000c100, 0x0000af00, 0x0000bd00, 0x00000300, 0x00000100, 0x00001300, 0x00008a00, 0x00006b00, 0x00003a00, 0x00009100, 0x00001100, 0x00004100, 0x00004f00, 0x00006700, 0x0000dc00, 0x0000ea00, 0x00009700, 0x0000f200, 0x0000cf00, 0x0000ce00, 0x0000f000, 0x0000b400, 0x0000e600, 0x00007300, 0x00009600, 0x0000ac00, 0x00007400, 0x00002200, 0x0000e700, 0x0000ad00, 0x00003500, 0x00008500, 0x0000e200, 0x0000f900, 0x00003700, 0x0000e800, 0x00001c00, 0x00007500, 0x0000df00, 0x00006e00, 0x00004700, 0x0000f100, 0x00001a00, 0x00007100, 0x00001d00, 0x00002900, 0x0000c500, 0x00008900, 0x00006f00, 0x0000b700, 0x00006200, 0x00000e00, 0x0000aa00, 0x00001800, 0x0000be00, 0x00001b00, 0x0000fc00, 0x00005600, 0x00003e00, 0x00004b00, 0x0000c600, 0x0000d200, 0x00007900, 0x00002000, 0x00009a00, 0x0000db00, 0x0000c000, 0x0000fe00, 0x00007800, 0x0000cd00, 0x00005a00, 0x0000f400, 0x00001f00, 0x0000dd00, 0x0000a800, 0x00003300, 0x00008800, 0x00000700, 0x0000c700, 0x00003100, 0x0000b100, 0x00001200, 0x00001000, 0x00005900, 0x00002700, 0x00008000, 0x0000ec00, 0x00005f00, 0x00006000, 0x00005100, 0x00007f00, 0x0000a900, 0x00001900, 0x0000b500, 0x00004a00, 0x00000d00, 0x00002d00, 0x0000e500, 0x00007a00, 0x00009f00, 0x00009300, 0x0000c900, 0x00009c00, 0x0000ef00, 0x0000a000, 0x0000e000, 0x00003b00, 0x00004d00, 0x0000ae00, 0x00002a00, 0x0000f500, 0x0000b000, 0x0000c800, 0x0000eb00, 0x0000bb00, 0x00003c00, 0x00008300, 0x00005300, 0x00009900, 0x00006100, 0x00001700, 0x00002b00, 0x00000400, 0x00007e00, 0x0000ba00, 0x00007700, 0x0000d600, 0x00002600, 0x0000e100, 0x00006900, 0x00001400, 0x00006300, 0x00005500, 0x00002100, 0x00000c00, 0x00007d00, }, { 0x00520000, 0x00090000, 0x006a0000, 0x00d50000, 0x00300000, 0x00360000, 0x00a50000, 0x00380000, 0x00bf0000, 0x00400000, 0x00a30000, 0x009e0000, 0x00810000, 0x00f30000, 0x00d70000, 0x00fb0000, 0x007c0000, 0x00e30000, 0x00390000, 0x00820000, 0x009b0000, 0x002f0000, 0x00ff0000, 0x00870000, 0x00340000, 0x008e0000, 0x00430000, 0x00440000, 0x00c40000, 0x00de0000, 0x00e90000, 0x00cb0000, 0x00540000, 0x007b0000, 0x00940000, 0x00320000, 0x00a60000, 0x00c20000, 0x00230000, 0x003d0000, 0x00ee0000, 0x004c0000, 0x00950000, 0x000b0000, 0x00420000, 0x00fa0000, 0x00c30000, 0x004e0000, 0x00080000, 0x002e0000, 0x00a10000, 0x00660000, 0x00280000, 0x00d90000, 0x00240000, 0x00b20000, 0x00760000, 0x005b0000, 0x00a20000, 0x00490000, 0x006d0000, 0x008b0000, 0x00d10000, 0x00250000, 0x00720000, 0x00f80000, 0x00f60000, 0x00640000, 0x00860000, 0x00680000, 0x00980000, 0x00160000, 0x00d40000, 0x00a40000, 0x005c0000, 0x00cc0000, 0x005d0000, 0x00650000, 0x00b60000, 0x00920000, 0x006c0000, 0x00700000, 0x00480000, 0x00500000, 0x00fd0000, 0x00ed0000, 0x00b90000, 0x00da0000, 0x005e0000, 0x00150000, 0x00460000, 0x00570000, 0x00a70000, 0x008d0000, 0x009d0000, 0x00840000, 0x00900000, 0x00d80000, 0x00ab0000, 0x00000000, 0x008c0000, 0x00bc0000, 0x00d30000, 0x000a0000, 0x00f70000, 0x00e40000, 0x00580000, 0x00050000, 0x00b80000, 0x00b30000, 0x00450000, 0x00060000, 0x00d00000, 0x002c0000, 0x001e0000, 0x008f0000, 0x00ca0000, 0x003f0000, 0x000f0000, 0x00020000, 0x00c10000, 0x00af0000, 0x00bd0000, 0x00030000, 0x00010000, 0x00130000, 0x008a0000, 0x006b0000, 0x003a0000, 0x00910000, 0x00110000, 0x00410000, 0x004f0000, 0x00670000, 0x00dc0000, 0x00ea0000, 0x00970000, 0x00f20000, 0x00cf0000, 0x00ce0000, 0x00f00000, 0x00b40000, 0x00e60000, 0x00730000, 0x00960000, 0x00ac0000, 0x00740000, 0x00220000, 0x00e70000, 0x00ad0000, 0x00350000, 0x00850000, 0x00e20000, 0x00f90000, 0x00370000, 0x00e80000, 0x001c0000, 0x00750000, 0x00df0000, 0x006e0000, 0x00470000, 0x00f10000, 0x001a0000, 0x00710000, 0x001d0000, 0x00290000, 0x00c50000, 0x00890000, 0x006f0000, 0x00b70000, 0x00620000, 0x000e0000, 0x00aa0000, 0x00180000, 0x00be0000, 0x001b0000, 0x00fc0000, 0x00560000, 0x003e0000, 0x004b0000, 0x00c60000, 0x00d20000, 0x00790000, 0x00200000, 0x009a0000, 0x00db0000, 0x00c00000, 0x00fe0000, 0x00780000, 0x00cd0000, 0x005a0000, 0x00f40000, 0x001f0000, 0x00dd0000, 0x00a80000, 0x00330000, 0x00880000, 0x00070000, 0x00c70000, 0x00310000, 0x00b10000, 0x00120000, 0x00100000, 0x00590000, 0x00270000, 0x00800000, 0x00ec0000, 0x005f0000, 0x00600000, 0x00510000, 0x007f0000, 0x00a90000, 0x00190000, 0x00b50000, 0x004a0000, 0x000d0000, 0x002d0000, 0x00e50000, 0x007a0000, 0x009f0000, 0x00930000, 0x00c90000, 0x009c0000, 0x00ef0000, 0x00a00000, 0x00e00000, 0x003b0000, 0x004d0000, 0x00ae0000, 0x002a0000, 0x00f50000, 0x00b00000, 0x00c80000, 0x00eb0000, 0x00bb0000, 0x003c0000, 0x00830000, 0x00530000, 0x00990000, 0x00610000, 0x00170000, 0x002b0000, 0x00040000, 0x007e0000, 0x00ba0000, 0x00770000, 0x00d60000, 0x00260000, 0x00e10000, 0x00690000, 0x00140000, 0x00630000, 0x00550000, 0x00210000, 0x000c0000, 0x007d0000, }, { 0x52000000, 0x09000000, 0x6a000000, 0xd5000000, 0x30000000, 0x36000000, 0xa5000000, 0x38000000, 0xbf000000, 0x40000000, 0xa3000000, 0x9e000000, 0x81000000, 0xf3000000, 0xd7000000, 0xfb000000, 0x7c000000, 0xe3000000, 0x39000000, 0x82000000, 0x9b000000, 0x2f000000, 0xff000000, 0x87000000, 0x34000000, 0x8e000000, 0x43000000, 0x44000000, 0xc4000000, 0xde000000, 0xe9000000, 0xcb000000, 0x54000000, 0x7b000000, 0x94000000, 0x32000000, 0xa6000000, 0xc2000000, 0x23000000, 0x3d000000, 0xee000000, 0x4c000000, 0x95000000, 0x0b000000, 0x42000000, 0xfa000000, 0xc3000000, 0x4e000000, 0x08000000, 0x2e000000, 0xa1000000, 0x66000000, 0x28000000, 0xd9000000, 0x24000000, 0xb2000000, 0x76000000, 0x5b000000, 0xa2000000, 0x49000000, 0x6d000000, 0x8b000000, 0xd1000000, 0x25000000, 0x72000000, 0xf8000000, 0xf6000000, 0x64000000, 0x86000000, 0x68000000, 0x98000000, 0x16000000, 0xd4000000, 0xa4000000, 0x5c000000, 0xcc000000, 0x5d000000, 0x65000000, 0xb6000000, 0x92000000, 0x6c000000, 0x70000000, 0x48000000, 0x50000000, 0xfd000000, 0xed000000, 0xb9000000, 0xda000000, 0x5e000000, 0x15000000, 0x46000000, 0x57000000, 0xa7000000, 0x8d000000, 0x9d000000, 0x84000000, 0x90000000, 0xd8000000, 0xab000000, 0x00000000, 0x8c000000, 0xbc000000, 0xd3000000, 0x0a000000, 0xf7000000, 0xe4000000, 0x58000000, 0x05000000, 0xb8000000, 0xb3000000, 0x45000000, 0x06000000, 0xd0000000, 0x2c000000, 0x1e000000, 0x8f000000, 0xca000000, 0x3f000000, 0x0f000000, 0x02000000, 0xc1000000, 0xaf000000, 0xbd000000, 0x03000000, 0x01000000, 0x13000000, 0x8a000000, 0x6b000000, 0x3a000000, 0x91000000, 0x11000000, 0x41000000, 0x4f000000, 0x67000000, 0xdc000000, 0xea000000, 0x97000000, 0xf2000000, 0xcf000000, 0xce000000, 0xf0000000, 0xb4000000, 0xe6000000, 0x73000000, 0x96000000, 0xac000000, 0x74000000, 0x22000000, 0xe7000000, 0xad000000, 0x35000000, 0x85000000, 0xe2000000, 0xf9000000, 0x37000000, 0xe8000000, 0x1c000000, 0x75000000, 0xdf000000, 0x6e000000, 0x47000000, 0xf1000000, 0x1a000000, 0x71000000, 0x1d000000, 0x29000000, 0xc5000000, 0x89000000, 0x6f000000, 0xb7000000, 0x62000000, 0x0e000000, 0xaa000000, 0x18000000, 0xbe000000, 0x1b000000, 0xfc000000, 0x56000000, 0x3e000000, 0x4b000000, 0xc6000000, 0xd2000000, 0x79000000, 0x20000000, 0x9a000000, 0xdb000000, 0xc0000000, 0xfe000000, 0x78000000, 0xcd000000, 0x5a000000, 0xf4000000, 0x1f000000, 0xdd000000, 0xa8000000, 0x33000000, 0x88000000, 0x07000000, 0xc7000000, 0x31000000, 0xb1000000, 0x12000000, 0x10000000, 0x59000000, 0x27000000, 0x80000000, 0xec000000, 0x5f000000, 0x60000000, 0x51000000, 0x7f000000, 0xa9000000, 0x19000000, 0xb5000000, 0x4a000000, 0x0d000000, 0x2d000000, 0xe5000000, 0x7a000000, 0x9f000000, 0x93000000, 0xc9000000, 0x9c000000, 0xef000000, 0xa0000000, 0xe0000000, 0x3b000000, 0x4d000000, 0xae000000, 0x2a000000, 0xf5000000, 0xb0000000, 0xc8000000, 0xeb000000, 0xbb000000, 0x3c000000, 0x83000000, 0x53000000, 0x99000000, 0x61000000, 0x17000000, 0x2b000000, 0x04000000, 0x7e000000, 0xba000000, 0x77000000, 0xd6000000, 0x26000000, 0xe1000000, 0x69000000, 0x14000000, 0x63000000, 0x55000000, 0x21000000, 0x0c000000, 0x7d000000, } }; EXPORT_SYMBOL_GPL(crypto_ft_tab); EXPORT_SYMBOL_GPL(crypto_it_tab); /** * crypto_aes_set_key - Set the AES key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * * This function uses aes_expand_key() to expand the key. &crypto_aes_ctx * _must_ be the private data embedded in @tfm which is retrieved with * crypto_tfm_ctx(). * * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths) */ int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); return aes_expandkey(ctx, in_key, key_len); } EXPORT_SYMBOL_GPL(crypto_aes_set_key); /* encrypt a block of text */ #define f_rn(bo, bi, n, k) do { \ bo[n] = crypto_ft_tab[0][byte(bi[n], 0)] ^ \ crypto_ft_tab[1][byte(bi[(n + 1) & 3], 1)] ^ \ crypto_ft_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_ft_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n); \ } while (0) #define f_nround(bo, bi, k) do {\ f_rn(bo, bi, 0, k); \ f_rn(bo, bi, 1, k); \ f_rn(bo, bi, 2, k); \ f_rn(bo, bi, 3, k); \ k += 4; \ } while (0) #define f_rl(bo, bi, n, k) do { \ bo[n] = crypto_fl_tab[0][byte(bi[n], 0)] ^ \ crypto_fl_tab[1][byte(bi[(n + 1) & 3], 1)] ^ \ crypto_fl_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_fl_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n); \ } while (0) #define f_lround(bo, bi, k) do {\ f_rl(bo, bi, 0, k); \ f_rl(bo, bi, 1, k); \ f_rl(bo, bi, 2, k); \ f_rl(bo, bi, 3, k); \ } while (0) static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); u32 b0[4], b1[4]; const u32 *kp = ctx->key_enc + 4; const int key_len = ctx->key_length; b0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in); b0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4); b0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8); b0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { f_nround(b1, b0, kp); f_nround(b0, b1, kp); } if (key_len > 16) { f_nround(b1, b0, kp); f_nround(b0, b1, kp); } f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_lround(b0, b1, kp); put_unaligned_le32(b0[0], out); put_unaligned_le32(b0[1], out + 4); put_unaligned_le32(b0[2], out + 8); put_unaligned_le32(b0[3], out + 12); } /* decrypt a block of text */ #define i_rn(bo, bi, n, k) do { \ bo[n] = crypto_it_tab[0][byte(bi[n], 0)] ^ \ crypto_it_tab[1][byte(bi[(n + 3) & 3], 1)] ^ \ crypto_it_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_it_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n); \ } while (0) #define i_nround(bo, bi, k) do {\ i_rn(bo, bi, 0, k); \ i_rn(bo, bi, 1, k); \ i_rn(bo, bi, 2, k); \ i_rn(bo, bi, 3, k); \ k += 4; \ } while (0) #define i_rl(bo, bi, n, k) do { \ bo[n] = crypto_il_tab[0][byte(bi[n], 0)] ^ \ crypto_il_tab[1][byte(bi[(n + 3) & 3], 1)] ^ \ crypto_il_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_il_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n); \ } while (0) #define i_lround(bo, bi, k) do {\ i_rl(bo, bi, 0, k); \ i_rl(bo, bi, 1, k); \ i_rl(bo, bi, 2, k); \ i_rl(bo, bi, 3, k); \ } while (0) static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); u32 b0[4], b1[4]; const int key_len = ctx->key_length; const u32 *kp = ctx->key_dec + 4; b0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in); b0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4); b0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8); b0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { i_nround(b1, b0, kp); i_nround(b0, b1, kp); } if (key_len > 16) { i_nround(b1, b0, kp); i_nround(b0, b1, kp); } i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_lround(b0, b1, kp); put_unaligned_le32(b0[0], out); put_unaligned_le32(b0[1], out + 4); put_unaligned_le32(b0[2], out + 8); put_unaligned_le32(b0[3], out + 12); } static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = crypto_aes_set_key, .cia_encrypt = crypto_aes_encrypt, .cia_decrypt = crypto_aes_decrypt } } }; static int __init aes_init(void) { return crypto_register_alg(&aes_alg); } static void __exit aes_fini(void) { crypto_unregister_alg(&aes_alg); } subsys_initcall(aes_init); module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_CRYPTO("aes"); MODULE_ALIAS_CRYPTO("aes-generic");
55 7 7 7 7 14 16 7 22 9 9 7 2 9 9 14 14 14 344 9 117 14 344 346 81 4 79 127 128 320 313 16 128 124 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 // SPDX-License-Identifier: GPL-2.0 /* * Ldisc rw semaphore * * The ldisc semaphore is semantically a rw_semaphore but which enforces * an alternate policy, namely: * 1) Supports lock wait timeouts * 2) Write waiter has priority * 3) Downgrading is not supported * * Implementation notes: * 1) Upper half of semaphore count is a wait count (differs from rwsem * in that rwsem normalizes the upper half to the wait bias) * 2) Lacks overflow checking * * The generic counting was copied and modified from include/asm-generic/rwsem.h * by Paul Mackerras <paulus@samba.org>. * * The scheduling policy was copied and modified from lib/rwsem.c * Written by David Howells (dhowells@redhat.com). * * This implementation incorporates the write lock stealing work of * Michel Lespinasse <walken@google.com>. * * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/tty.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task.h> #if BITS_PER_LONG == 64 # define LDSEM_ACTIVE_MASK 0xffffffffL #else # define LDSEM_ACTIVE_MASK 0x0000ffffL #endif #define LDSEM_UNLOCKED 0L #define LDSEM_ACTIVE_BIAS 1L #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) struct ldsem_waiter { struct list_head list; struct task_struct *task; }; /* * Initialize an ldsem: */ void __init_ldsem(struct ld_semaphore *sem, const char *name, struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held semaphore: */ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif atomic_long_set(&sem->count, LDSEM_UNLOCKED); sem->wait_readers = 0; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->read_wait); INIT_LIST_HEAD(&sem->write_wait); } static void __ldsem_wake_readers(struct ld_semaphore *sem) { struct ldsem_waiter *waiter, *next; struct task_struct *tsk; long adjust, count; /* * Try to grant read locks to all readers on the read wait list. * Note the 'active part' of the count is incremented by * the number of readers before waking any processes up. */ adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); count = atomic_long_add_return(adjust, &sem->count); do { if (count > 0) break; if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) return; } while (1); list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { tsk = waiter->task; smp_store_release(&waiter->task, NULL); wake_up_process(tsk); put_task_struct(tsk); } INIT_LIST_HEAD(&sem->read_wait); sem->wait_readers = 0; } static inline int writer_trylock(struct ld_semaphore *sem) { /* * Only wake this writer if the active part of the count can be * transitioned from 0 -> 1 */ long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); do { if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) return 1; if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) return 0; } while (1); } static void __ldsem_wake_writer(struct ld_semaphore *sem) { struct ldsem_waiter *waiter; waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); wake_up_process(waiter->task); } /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) * - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed */ static void __ldsem_wake(struct ld_semaphore *sem) { if (!list_empty(&sem->write_wait)) __ldsem_wake_writer(sem); else if (!list_empty(&sem->read_wait)) __ldsem_wake_readers(sem); } static void ldsem_wake(struct ld_semaphore *sem) { unsigned long flags; raw_spin_lock_irqsave(&sem->wait_lock, flags); __ldsem_wake(sem); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* * wait for the read lock to be granted */ static struct ld_semaphore __sched * down_read_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if there are no waiters, * and early-out if not */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { count += adjust; break; } if (count > 0) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->read_wait); sem->wait_readers++; waiter.task = current; get_task_struct(current); /* if there are no active locks, wake the new lock owner(s) */ if ((count & LDSEM_ACTIVE_MASK) == 0) __ldsem_wake(sem); raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!smp_load_acquire(&waiter.task)) break; if (!timeout) break; timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); if (!timeout) { /* * Lock timed out but check if this task was just * granted lock ownership - if so, pretend there * was no timeout; otherwise, cleanup lock wait. */ raw_spin_lock_irq(&sem->wait_lock); if (waiter.task) { atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); sem->wait_readers--; list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); put_task_struct(waiter.task); return NULL; } raw_spin_unlock_irq(&sem->wait_lock); } return sem; } /* * wait for the write lock to be granted */ static struct ld_semaphore __sched * down_write_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS; int locked = 0; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if the lock is now owned, * and early-out if so. */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) break; if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->write_wait); waiter.task = current; set_current_state(TASK_UNINTERRUPTIBLE); for (;;) { if (!timeout) break; raw_spin_unlock_irq(&sem->wait_lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->wait_lock); set_current_state(TASK_UNINTERRUPTIBLE); locked = writer_trylock(sem); if (locked) break; } if (!locked) atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); list_del(&waiter.list); /* * In case of timeout, wake up every reader who gave the right of way * to writer. Prevent separation readers into two groups: * one that helds semaphore and another that sleeps. * (in case of no contention with a writer) */ if (!locked && list_empty(&sem->write_wait)) __ldsem_wake_readers(sem); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); /* lock wait may have timed out */ if (!locked) return NULL; return sem; } static int __ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); if (count <= 0) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_read_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } static int __ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_write_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } /* * lock for reading -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, 0, timeout); } /* * trylock for reading -- returns 1 if successful, 0 if contention */ int ldsem_down_read_trylock(struct ld_semaphore *sem) { long count = atomic_long_read(&sem->count); while (count >= 0) { if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); lock_acquired(&sem->dep_map, _RET_IP_); return 1; } } return 0; } /* * lock for writing -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, 0, timeout); } /* * trylock for writing -- returns 1 if successful, 0 if contention */ int ldsem_down_write_trylock(struct ld_semaphore *sem) { long count = atomic_long_read(&sem->count); while ((count & LDSEM_ACTIVE_MASK) == 0) { if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); lock_acquired(&sem->dep_map, _RET_IP_); return 1; } } return 0; } /* * release a read lock */ void ldsem_up_read(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) ldsem_wake(sem); } /* * release a write lock */ void ldsem_up_write(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); if (count < 0) ldsem_wake(sem); } #ifdef CONFIG_DEBUG_LOCK_ALLOC int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, subclass, timeout); } int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, subclass, timeout); } #endif
8 8 8 8 8 8 29 21 8 8 8 6921 6927 6923 6917 6932 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains AppArmor mediation of files * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #include <linux/tty.h> #include <linux/fdtable.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/mount.h> #include "include/apparmor.h" #include "include/audit.h" #include "include/cred.h" #include "include/file.h" #include "include/match.h" #include "include/net.h" #include "include/path.h" #include "include/policy.h" #include "include/label.h" static u32 map_mask_to_chr_mask(u32 mask) { u32 m = mask & PERMS_CHRS_MASK; if (mask & AA_MAY_GETATTR) m |= MAY_READ; if (mask & (AA_MAY_SETATTR | AA_MAY_CHMOD | AA_MAY_CHOWN)) m |= MAY_WRITE; return m; } /** * file_audit_cb - call back for file specific audit fields * @ab: audit_buffer (NOT NULL) * @va: audit struct to audit values of (NOT NULL) */ static void file_audit_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; struct apparmor_audit_data *ad = aad(sa); kuid_t fsuid = ad->subj_cred ? ad->subj_cred->fsuid : current_fsuid(); char str[10]; if (ad->request & AA_AUDIT_FILE_MASK) { aa_perm_mask_to_str(str, sizeof(str), aa_file_perm_chrs, map_mask_to_chr_mask(ad->request)); audit_log_format(ab, " requested_mask=\"%s\"", str); } if (ad->denied & AA_AUDIT_FILE_MASK) { aa_perm_mask_to_str(str, sizeof(str), aa_file_perm_chrs, map_mask_to_chr_mask(ad->denied)); audit_log_format(ab, " denied_mask=\"%s\"", str); } if (ad->request & AA_AUDIT_FILE_MASK) { audit_log_format(ab, " fsuid=%d", from_kuid(&init_user_ns, fsuid)); audit_log_format(ab, " ouid=%d", from_kuid(&init_user_ns, ad->fs.ouid)); } if (ad->peer) { audit_log_format(ab, " target="); aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAG_VIEW_SUBNS, GFP_KERNEL); } else if (ad->fs.target) { audit_log_format(ab, " target="); audit_log_untrustedstring(ab, ad->fs.target); } } /** * aa_audit_file - handle the auditing of file operations * @subj_cred: cred of the subject * @profile: the profile being enforced (NOT NULL) * @perms: the permissions computed for the request (NOT NULL) * @op: operation being mediated * @request: permissions requested * @name: name of object being mediated (MAYBE NULL) * @target: name of target (MAYBE NULL) * @tlabel: target label (MAY BE NULL) * @ouid: object uid * @info: extra information message (MAYBE NULL) * @error: 0 if operation allowed else failure error code * * Returns: %0 or error on failure */ int aa_audit_file(const struct cred *subj_cred, struct aa_profile *profile, struct aa_perms *perms, const char *op, u32 request, const char *name, const char *target, struct aa_label *tlabel, kuid_t ouid, const char *info, int error) { int type = AUDIT_APPARMOR_AUTO; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_TASK, AA_CLASS_FILE, op); ad.subj_cred = subj_cred; ad.request = request; ad.name = name; ad.fs.target = target; ad.peer = tlabel; ad.fs.ouid = ouid; ad.info = info; ad.error = error; ad.common.u.tsk = NULL; if (likely(!ad.error)) { u32 mask = perms->audit; if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL)) mask = 0xffff; /* mask off perms that are not being force audited */ ad.request &= mask; if (likely(!ad.request)) return 0; type = AUDIT_APPARMOR_AUDIT; } else { /* only report permissions that were denied */ ad.request = ad.request & ~perms->allow; AA_BUG(!ad.request); if (ad.request & perms->kill) type = AUDIT_APPARMOR_KILL; /* quiet known rejects, assumes quiet and kill do not overlap */ if ((ad.request & perms->quiet) && AUDIT_MODE(profile) != AUDIT_NOQUIET && AUDIT_MODE(profile) != AUDIT_ALL) ad.request &= ~perms->quiet; if (!ad.request) return ad.error; } ad.denied = ad.request & ~perms->allow; return aa_audit(type, profile, &ad, file_audit_cb); } /** * is_deleted - test if a file has been completely unlinked * @dentry: dentry of file to test for deletion (NOT NULL) * * Returns: true if deleted else false */ static inline bool is_deleted(struct dentry *dentry) { if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0) return true; return false; } static int path_name(const char *op, const struct cred *subj_cred, struct aa_label *label, const struct path *path, int flags, char *buffer, const char **name, struct path_cond *cond, u32 request) { struct aa_profile *profile; const char *info = NULL; int error; error = aa_path_name(path, flags, buffer, name, &info, labels_profile(label)->disconnected); if (error) { fn_for_each_confined(label, profile, aa_audit_file(subj_cred, profile, &nullperms, op, request, *name, NULL, NULL, cond->uid, info, error)); return error; } return 0; } struct aa_perms default_perms = {}; /** * aa_lookup_fperms - convert dfa compressed perms to internal perms * @file_rules: the aa_policydb to lookup perms for (NOT NULL) * @state: state in dfa * @cond: conditions to consider (NOT NULL) * * TODO: convert from dfa + state to permission entry * * Returns: a pointer to a file permission set */ struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, aa_state_t state, struct path_cond *cond) { unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state]; if (!(file_rules->perms)) return &default_perms; if (uid_eq(current_fsuid(), cond->uid)) return &(file_rules->perms[index]); return &(file_rules->perms[index + 1]); } /** * aa_str_perms - find permission that match @name * @file_rules: the aa_policydb to match against (NOT NULL) * @start: state to start matching in * @name: string to match against dfa (NOT NULL) * @cond: conditions to consider for permission set computation (NOT NULL) * @perms: Returns - the permissions found when matching @name * * Returns: the final state in @dfa when beginning @start and walking @name */ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, const char *name, struct path_cond *cond, struct aa_perms *perms) { aa_state_t state; state = aa_dfa_match(file_rules->dfa, start, name); *perms = *(aa_lookup_fperms(file_rules, state, cond)); return state; } static int __aa_path_perm(const char *op, const struct cred *subj_cred, struct aa_profile *profile, const char *name, u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); int e = 0; if (profile_unconfined(profile)) return 0; aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE], name, cond, perms); if (request & ~perms->allow) e = -EACCES; return aa_audit_file(subj_cred, profile, perms, op, request, name, NULL, NULL, cond->uid, NULL, e); } static int profile_path_perm(const char *op, const struct cred *subj_cred, struct aa_profile *profile, const struct path *path, char *buffer, u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) { const char *name; int error; if (profile_unconfined(profile)) return 0; error = path_name(op, subj_cred, &profile->label, path, flags | profile->path_flags, buffer, &name, cond, request); if (error) return error; return __aa_path_perm(op, subj_cred, profile, name, request, cond, flags, perms); } /** * aa_path_perm - do permissions check & audit for @path * @op: operation being checked * @subj_cred: subject cred * @label: profile being enforced (NOT NULL) * @path: path to check permissions of (NOT NULL) * @flags: any additional path flags beyond what the profile specifies * @request: requested permissions * @cond: conditional info for this request (NOT NULL) * * Returns: %0 else error if access denied or other error */ int aa_path_perm(const char *op, const struct cred *subj_cred, struct aa_label *label, const struct path *path, int flags, u32 request, struct path_cond *cond) { struct aa_perms perms = {}; struct aa_profile *profile; char *buffer = NULL; int error; flags |= PATH_DELEGATE_DELETED | (S_ISDIR(cond->mode) ? PATH_IS_DIR : 0); buffer = aa_get_buffer(false); if (!buffer) return -ENOMEM; error = fn_for_each_confined(label, profile, profile_path_perm(op, subj_cred, profile, path, buffer, request, cond, flags, &perms)); aa_put_buffer(buffer); return error; } /** * xindex_is_subset - helper for aa_path_link * @link: link permission set * @target: target permission set * * test target x permissions are equal OR a subset of link x permissions * this is done as part of the subset test, where a hardlink must have * a subset of permissions that the target has. * * Returns: true if subset else false */ static inline bool xindex_is_subset(u32 link, u32 target) { if (((link & ~AA_X_UNSAFE) != (target & ~AA_X_UNSAFE)) || ((link & AA_X_UNSAFE) && !(target & AA_X_UNSAFE))) return false; return true; } static int profile_path_link(const struct cred *subj_cred, struct aa_profile *profile, const struct path *link, char *buffer, const struct path *target, char *buffer2, struct path_cond *cond) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); const char *lname, *tname = NULL; struct aa_perms lperms = {}, perms; const char *info = NULL; u32 request = AA_MAY_LINK; aa_state_t state; int error; error = path_name(OP_LINK, subj_cred, &profile->label, link, profile->path_flags, buffer, &lname, cond, AA_MAY_LINK); if (error) goto audit; /* buffer2 freed below, tname is pointer in buffer2 */ error = path_name(OP_LINK, subj_cred, &profile->label, target, profile->path_flags, buffer2, &tname, cond, AA_MAY_LINK); if (error) goto audit; error = -EACCES; /* aa_str_perms - handles the case of the dfa being NULL */ state = aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE], lname, cond, &lperms); if (!(lperms.allow & AA_MAY_LINK)) goto audit; /* test to see if target can be paired with link */ state = aa_dfa_null_transition(rules->file->dfa, state); aa_str_perms(rules->file, state, tname, cond, &perms); /* force audit/quiet masks for link are stored in the second entry * in the link pair. */ lperms.audit = perms.audit; lperms.quiet = perms.quiet; lperms.kill = perms.kill; if (!(perms.allow & AA_MAY_LINK)) { info = "target restricted"; lperms = perms; goto audit; } /* done if link subset test is not required */ if (!(perms.allow & AA_LINK_SUBSET)) goto done_tests; /* Do link perm subset test requiring allowed permission on link are * a subset of the allowed permissions on target. */ aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE], tname, cond, &perms); /* AA_MAY_LINK is not considered in the subset test */ request = lperms.allow & ~AA_MAY_LINK; lperms.allow &= perms.allow | AA_MAY_LINK; request |= AA_AUDIT_FILE_MASK & (lperms.allow & ~perms.allow); if (request & ~lperms.allow) { goto audit; } else if ((lperms.allow & MAY_EXEC) && !xindex_is_subset(lperms.xindex, perms.xindex)) { lperms.allow &= ~MAY_EXEC; request |= MAY_EXEC; info = "link not subset of target"; goto audit; } done_tests: error = 0; audit: return aa_audit_file(subj_cred, profile, &lperms, OP_LINK, request, lname, tname, NULL, cond->uid, info, error); } /** * aa_path_link - Handle hard link permission check * @subj_cred: subject cred * @label: the label being enforced (NOT NULL) * @old_dentry: the target dentry (NOT NULL) * @new_dir: directory the new link will be created in (NOT NULL) * @new_dentry: the link being created (NOT NULL) * * Handle the permission test for a link & target pair. Permission * is encoded as a pair where the link permission is determined * first, and if allowed, the target is tested. The target test * is done from the point of the link match (not start of DFA) * making the target permission dependent on the link permission match. * * The subset test if required forces that permissions granted * on link are a subset of the permission granted to target. * * Returns: %0 if allowed else error */ int aa_path_link(const struct cred *subj_cred, struct aa_label *label, struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { struct path link = { .mnt = new_dir->mnt, .dentry = new_dentry }; struct path target = { .mnt = new_dir->mnt, .dentry = old_dentry }; struct path_cond cond = { d_backing_inode(old_dentry)->i_uid, d_backing_inode(old_dentry)->i_mode }; char *buffer = NULL, *buffer2 = NULL; struct aa_profile *profile; int error; /* buffer freed below, lname is pointer in buffer */ buffer = aa_get_buffer(false); buffer2 = aa_get_buffer(false); error = -ENOMEM; if (!buffer || !buffer2) goto out; error = fn_for_each_confined(label, profile, profile_path_link(subj_cred, profile, &link, buffer, &target, buffer2, &cond)); out: aa_put_buffer(buffer); aa_put_buffer(buffer2); return error; } static void update_file_ctx(struct aa_file_ctx *fctx, struct aa_label *label, u32 request) { struct aa_label *l, *old; /* update caching of label on file_ctx */ spin_lock(&fctx->lock); old = rcu_dereference_protected(fctx->label, lockdep_is_held(&fctx->lock)); l = aa_label_merge(old, label, GFP_ATOMIC); if (l) { if (l != old) { rcu_assign_pointer(fctx->label, l); aa_put_label(old); } else aa_put_label(l); fctx->allow |= request; } spin_unlock(&fctx->lock); } static int __file_path_perm(const char *op, const struct cred *subj_cred, struct aa_label *label, struct aa_label *flabel, struct file *file, u32 request, u32 denied, bool in_atomic) { struct aa_profile *profile; struct aa_perms perms = {}; vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(file), file_inode(file)); struct path_cond cond = { .uid = vfsuid_into_kuid(vfsuid), .mode = file_inode(file)->i_mode }; char *buffer; int flags, error; /* revalidation due to label out of date. No revocation at this time */ if (!denied && aa_label_is_subset(flabel, label)) /* TODO: check for revocation on stale profiles */ return 0; flags = PATH_DELEGATE_DELETED | (S_ISDIR(cond.mode) ? PATH_IS_DIR : 0); buffer = aa_get_buffer(in_atomic); if (!buffer) return -ENOMEM; /* check every profile in task label not in current cache */ error = fn_for_each_not_in_set(flabel, label, profile, profile_path_perm(op, subj_cred, profile, &file->f_path, buffer, request, &cond, flags, &perms)); if (denied && !error) { /* * check every profile in file label that was not tested * in the initial check above. * * TODO: cache full perms so this only happens because of * conditionals * TODO: don't audit here */ if (label == flabel) error = fn_for_each(label, profile, profile_path_perm(op, subj_cred, profile, &file->f_path, buffer, request, &cond, flags, &perms)); else error = fn_for_each_not_in_set(label, flabel, profile, profile_path_perm(op, subj_cred, profile, &file->f_path, buffer, request, &cond, flags, &perms)); } if (!error) update_file_ctx(file_ctx(file), label, request); aa_put_buffer(buffer); return error; } static int __file_sock_perm(const char *op, const struct cred *subj_cred, struct aa_label *label, struct aa_label *flabel, struct file *file, u32 request, u32 denied) { struct socket *sock = (struct socket *) file->private_data; int error; AA_BUG(!sock); /* revalidation due to label out of date. No revocation at this time */ if (!denied && aa_label_is_subset(flabel, label)) return 0; /* TODO: improve to skip profiles cached in flabel */ error = aa_sock_file_perm(subj_cred, label, op, request, sock); if (denied) { /* TODO: improve to skip profiles checked above */ /* check every profile in file label to is cached */ last_error(error, aa_sock_file_perm(subj_cred, flabel, op, request, sock)); } if (!error) update_file_ctx(file_ctx(file), label, request); return error; } /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked * @subj_cred: subject cred * @label: label being enforced (NOT NULL) * @file: file to revalidate access permissions on (NOT NULL) * @request: requested permissions * @in_atomic: whether allocations need to be done in atomic context * * Returns: %0 if access allowed else error */ int aa_file_perm(const char *op, const struct cred *subj_cred, struct aa_label *label, struct file *file, u32 request, bool in_atomic) { struct aa_file_ctx *fctx; struct aa_label *flabel; u32 denied; int error = 0; AA_BUG(!label); AA_BUG(!file); fctx = file_ctx(file); rcu_read_lock(); flabel = rcu_dereference(fctx->label); AA_BUG(!flabel); /* revalidate access, if task is unconfined, or the cached cred * doesn't match or if the request is for more permissions than * was granted. * * Note: the test for !unconfined(flabel) is to handle file * delegation from unconfined tasks */ denied = request & ~fctx->allow; if (unconfined(label) || unconfined(flabel) || (!denied && aa_label_is_subset(flabel, label))) { rcu_read_unlock(); goto done; } flabel = aa_get_newest_label(flabel); rcu_read_unlock(); /* TODO: label cross check */ if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry)) error = __file_path_perm(op, subj_cred, label, flabel, file, request, denied, in_atomic); else if (S_ISSOCK(file_inode(file)->i_mode)) error = __file_sock_perm(op, subj_cred, label, flabel, file, request, denied); aa_put_label(flabel); done: return error; } static void revalidate_tty(const struct cred *subj_cred, struct aa_label *label) { struct tty_struct *tty; int drop_tty = 0; tty = get_current_tty(); if (!tty) return; spin_lock(&tty->files_lock); if (!list_empty(&tty->tty_files)) { struct tty_file_private *file_priv; struct file *file; /* TODO: Revalidate access to controlling tty. */ file_priv = list_first_entry(&tty->tty_files, struct tty_file_private, list); file = file_priv->file; if (aa_file_perm(OP_INHERIT, subj_cred, label, file, MAY_READ | MAY_WRITE, IN_ATOMIC)) drop_tty = 1; } spin_unlock(&tty->files_lock); tty_kref_put(tty); if (drop_tty) no_tty(); } struct cred_label { const struct cred *cred; struct aa_label *label; }; static int match_file(const void *p, struct file *file, unsigned int fd) { struct cred_label *cl = (struct cred_label *)p; if (aa_file_perm(OP_INHERIT, cl->cred, cl->label, file, aa_map_file_to_perms(file), IN_ATOMIC)) return fd + 1; return 0; } /* based on selinux's flush_unauthorized_files */ void aa_inherit_files(const struct cred *cred, struct files_struct *files) { struct aa_label *label = aa_get_newest_cred_label(cred); struct cred_label cl = { .cred = cred, .label = label, }; struct file *devnull = NULL; unsigned int n; revalidate_tty(cred, label); /* Revalidate access to inherited open files. */ n = iterate_fd(files, 0, match_file, &cl); if (!n) /* none found? */ goto out; devnull = dentry_open(&aa_null, O_RDWR, cred); if (IS_ERR(devnull)) devnull = NULL; /* replace all the matching ones with this */ do { replace_fd(n - 1, devnull, 0); } while ((n = iterate_fd(files, n, match_file, &cl)) != 0); if (devnull) fput(devnull); out: aa_put_label(label); }
7 10 9 2 8 8 8 8 7 7 4 1 5 1 1 8 8 5 1 4 4 1 4 6 2 1 2 1 11 1 4 4 3 3 3 4 2 1 2 1 2 1 4 3 1 3 1 68 1 6 1 14 1 2 5 1 1 1 21 22 3 2 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 // SPDX-License-Identifier: GPL-2.0-only /* * vhost transport for vsock * * Copyright (C) 2013-2015 Red Hat, Inc. * Author: Asias He <asias@redhat.com> * Stefan Hajnoczi <stefanha@redhat.com> */ #include <linux/miscdevice.h> #include <linux/atomic.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/vmalloc.h> #include <net/sock.h> #include <linux/virtio_vsock.h> #include <linux/vhost.h> #include <linux/hashtable.h> #include <net/af_vsock.h> #include "vhost.h" #define VHOST_VSOCK_DEFAULT_HOST_CID 2 /* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ #define VHOST_VSOCK_WEIGHT 0x80000 /* Max number of packets transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others with * small pkts. */ #define VHOST_VSOCK_PKT_WEIGHT 256 enum { VHOST_VSOCK_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_F_ACCESS_PLATFORM) | (1ULL << VIRTIO_VSOCK_F_SEQPACKET) }; enum { VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) }; /* Used to track all the vhost_vsock instances on the system. */ static DEFINE_MUTEX(vhost_vsock_mutex); static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); struct vhost_vsock { struct vhost_dev dev; struct vhost_virtqueue vqs[2]; /* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */ struct hlist_node hash; struct vhost_work send_pkt_work; struct sk_buff_head send_pkt_queue; /* host->guest pending packets */ atomic_t queued_replies; u32 guest_cid; bool seqpacket_allow; }; static u32 vhost_transport_get_local_cid(void) { return VHOST_VSOCK_DEFAULT_HOST_CID; } /* Callers that dereference the return value must hold vhost_vsock_mutex or the * RCU read lock. */ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) { struct vhost_vsock *vsock; hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) { u32 other_cid = vsock->guest_cid; /* Skip instances that have no CID yet */ if (other_cid == 0) continue; if (other_cid == guest_cid) return vsock; } return NULL; } static void vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) { struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; int pkts = 0, total_len = 0; bool added = false; bool restart_tx = false; mutex_lock(&vq->mutex); if (!vhost_vq_get_backend(vq)) goto out; if (!vq_meta_prefetch(vq)) goto out; /* Avoid further vmexits, we're already processing the virtqueue */ vhost_disable_notify(&vsock->dev, vq); do { struct virtio_vsock_hdr *hdr; size_t iov_len, payload_len; struct iov_iter iov_iter; u32 flags_to_restore = 0; struct sk_buff *skb; unsigned out, in; size_t nbytes; u32 offset; int head; skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); if (!skb) { vhost_enable_notify(&vsock->dev, vq); break; } head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); if (head < 0) { virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); break; } if (head == vq->num) { virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); /* We cannot finish yet if more buffers snuck in while * re-enabling notify. */ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { vhost_disable_notify(&vsock->dev, vq); continue; } break; } if (out) { kfree_skb(skb); vq_err(vq, "Expected 0 output buffers, got %u\n", out); break; } iov_len = iov_length(&vq->iov[out], in); if (iov_len < sizeof(*hdr)) { kfree_skb(skb); vq_err(vq, "Buffer len [%zu] too small\n", iov_len); break; } iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len); offset = VIRTIO_VSOCK_SKB_CB(skb)->offset; payload_len = skb->len - offset; hdr = virtio_vsock_hdr(skb); /* If the packet is greater than the space available in the * buffer, we split it using multiple buffers. */ if (payload_len > iov_len - sizeof(*hdr)) { payload_len = iov_len - sizeof(*hdr); /* As we are copying pieces of large packet's buffer to * small rx buffers, headers of packets in rx queue are * created dynamically and are initialized with header * of current packet(except length). But in case of * SOCK_SEQPACKET, we also must clear message delimeter * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise, * there will be sequence of packets with these * bits set. After initialized header will be copied to * rx buffer, these required bits will be restored. */ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM; if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) { hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR; } } } /* Set the correct length in the header */ hdr->len = cpu_to_le32(payload_len); nbytes = copy_to_iter(hdr, sizeof(*hdr), &iov_iter); if (nbytes != sizeof(*hdr)) { kfree_skb(skb); vq_err(vq, "Faulted on copying pkt hdr\n"); break; } if (skb_copy_datagram_iter(skb, offset, &iov_iter, payload_len)) { kfree_skb(skb); vq_err(vq, "Faulted on copying pkt buf\n"); break; } /* Deliver to monitoring devices all packets that we * will transmit. */ virtio_transport_deliver_tap_pkt(skb); vhost_add_used(vq, head, sizeof(*hdr) + payload_len); added = true; VIRTIO_VSOCK_SKB_CB(skb)->offset += payload_len; total_len += payload_len; /* If we didn't send all the payload we can requeue the packet * to send it with the next available buffer. */ if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) { hdr->flags |= cpu_to_le32(flags_to_restore); /* We are queueing the same skb to handle * the remaining bytes, and we want to deliver it * to monitoring devices in the next iteration. */ virtio_vsock_skb_clear_tap_delivered(skb); virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); } else { if (virtio_vsock_skb_reply(skb)) { int val; val = atomic_dec_return(&vsock->queued_replies); /* Do we have resources to resume tx * processing? */ if (val + 1 == tx_vq->num) restart_tx = true; } consume_skb(skb); } } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); if (added) vhost_signal(&vsock->dev, vq); out: mutex_unlock(&vq->mutex); if (restart_tx) vhost_poll_queue(&tx_vq->poll); } static void vhost_transport_send_pkt_work(struct vhost_work *work) { struct vhost_virtqueue *vq; struct vhost_vsock *vsock; vsock = container_of(work, struct vhost_vsock, send_pkt_work); vq = &vsock->vqs[VSOCK_VQ_RX]; vhost_transport_do_send_pkt(vsock, vq); } static int vhost_transport_send_pkt(struct sk_buff *skb) { struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct vhost_vsock *vsock; int len = skb->len; rcu_read_lock(); /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid)); if (!vsock) { rcu_read_unlock(); kfree_skb(skb); return -ENODEV; } if (virtio_vsock_skb_reply(skb)) atomic_inc(&vsock->queued_replies); virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); rcu_read_unlock(); return len; } static int vhost_transport_cancel_pkt(struct vsock_sock *vsk) { struct vhost_vsock *vsock; int cnt = 0; int ret = -ENODEV; rcu_read_lock(); /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); if (!vsock) goto out; cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue); if (cnt) { struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; int new_cnt; new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) vhost_poll_queue(&tx_vq->poll); } ret = 0; out: rcu_read_unlock(); return ret; } static struct sk_buff * vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, unsigned int out, unsigned int in) { struct virtio_vsock_hdr *hdr; struct iov_iter iov_iter; struct sk_buff *skb; size_t payload_len; size_t nbytes; size_t len; if (in != 0) { vq_err(vq, "Expected 0 input buffers, got %u\n", in); return NULL; } len = iov_length(vq->iov, out); /* len contains both payload and hdr */ skb = virtio_vsock_alloc_skb(len, GFP_KERNEL); if (!skb) return NULL; iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len); hdr = virtio_vsock_hdr(skb); nbytes = copy_from_iter(hdr, sizeof(*hdr), &iov_iter); if (nbytes != sizeof(*hdr)) { vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", sizeof(*hdr), nbytes); kfree_skb(skb); return NULL; } payload_len = le32_to_cpu(hdr->len); /* No payload */ if (!payload_len) return skb; /* The pkt is too big or the length in the header is invalid */ if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || payload_len + sizeof(*hdr) > len) { kfree_skb(skb); return NULL; } virtio_vsock_skb_rx_put(skb); nbytes = copy_from_iter(skb->data, payload_len, &iov_iter); if (nbytes != payload_len) { vq_err(vq, "Expected %zu byte payload, got %zu bytes\n", payload_len, nbytes); kfree_skb(skb); return NULL; } return skb; } /* Is there space left for replies to rx packets? */ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) { struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; int val; smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ val = atomic_read(&vsock->queued_replies); return val < vq->num; } static bool vhost_transport_msgzerocopy_allow(void) { return true; } static bool vhost_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport vhost_transport = { .transport = { .module = THIS_MODULE, .get_local_cid = vhost_transport_get_local_cid, .init = virtio_transport_do_socket_init, .destruct = virtio_transport_destruct, .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, .cancel_pkt = vhost_transport_cancel_pkt, .dgram_enqueue = virtio_transport_dgram_enqueue, .dgram_dequeue = virtio_transport_dgram_dequeue, .dgram_bind = virtio_transport_dgram_bind, .dgram_allow = virtio_transport_dgram_allow, .stream_enqueue = virtio_transport_stream_enqueue, .stream_dequeue = virtio_transport_stream_dequeue, .stream_has_data = virtio_transport_stream_has_data, .stream_has_space = virtio_transport_stream_has_space, .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, .stream_is_active = virtio_transport_stream_is_active, .stream_allow = virtio_transport_stream_allow, .seqpacket_dequeue = virtio_transport_seqpacket_dequeue, .seqpacket_enqueue = virtio_transport_seqpacket_enqueue, .seqpacket_allow = vhost_transport_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, .msgzerocopy_allow = vhost_transport_msgzerocopy_allow, .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, .notify_send_init = virtio_transport_notify_send_init, .notify_send_pre_block = virtio_transport_notify_send_pre_block, .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .read_skb = virtio_transport_read_skb, }, .send_pkt = vhost_transport_send_pkt, }; static bool vhost_transport_seqpacket_allow(u32 remote_cid) { struct vhost_vsock *vsock; bool seqpacket_allow = false; rcu_read_lock(); vsock = vhost_vsock_get(remote_cid); if (vsock) seqpacket_allow = vsock->seqpacket_allow; rcu_read_unlock(); return seqpacket_allow; } static void vhost_vsock_handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, dev); int head, pkts = 0, total_len = 0; unsigned int out, in; struct sk_buff *skb; bool added = false; mutex_lock(&vq->mutex); if (!vhost_vq_get_backend(vq)) goto out; if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&vsock->dev, vq); do { struct virtio_vsock_hdr *hdr; if (!vhost_vsock_more_replies(vsock)) { /* Stop tx until the device processes already * pending replies. Leave tx virtqueue * callbacks disabled. */ goto no_more_replies; } head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); if (head < 0) break; if (head == vq->num) { if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { vhost_disable_notify(&vsock->dev, vq); continue; } break; } skb = vhost_vsock_alloc_skb(vq, out, in); if (!skb) { vq_err(vq, "Faulted on pkt\n"); continue; } total_len += sizeof(*hdr) + skb->len; /* Deliver to monitoring devices all received packets */ virtio_transport_deliver_tap_pkt(skb); hdr = virtio_vsock_hdr(skb); /* Only accept correctly addressed packets */ if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid && le64_to_cpu(hdr->dst_cid) == vhost_transport_get_local_cid()) virtio_transport_recv_pkt(&vhost_transport, skb); else kfree_skb(skb); vhost_add_used(vq, head, 0); added = true; } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); no_more_replies: if (added) vhost_signal(&vsock->dev, vq); out: mutex_unlock(&vq->mutex); } static void vhost_vsock_handle_rx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, dev); vhost_transport_do_send_pkt(vsock, vq); } static int vhost_vsock_start(struct vhost_vsock *vsock) { struct vhost_virtqueue *vq; size_t i; int ret; mutex_lock(&vsock->dev.mutex); ret = vhost_dev_check_owner(&vsock->dev); if (ret) goto err; for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); if (!vhost_vq_access_ok(vq)) { ret = -EFAULT; goto err_vq; } if (!vhost_vq_get_backend(vq)) { vhost_vq_set_backend(vq, vsock); ret = vhost_vq_init_access(vq); if (ret) goto err_vq; } mutex_unlock(&vq->mutex); } /* Some packets may have been queued before the device was started, * let's kick the send worker to send them. */ vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); mutex_unlock(&vsock->dev.mutex); return 0; err_vq: vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); } err: mutex_unlock(&vsock->dev.mutex); return ret; } static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner) { size_t i; int ret = 0; mutex_lock(&vsock->dev.mutex); if (check_owner) { ret = vhost_dev_check_owner(&vsock->dev); if (ret) goto err; } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { struct vhost_virtqueue *vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); } err: mutex_unlock(&vsock->dev.mutex); return ret; } static void vhost_vsock_free(struct vhost_vsock *vsock) { kvfree(vsock); } static int vhost_vsock_dev_open(struct inode *inode, struct file *file) { struct vhost_virtqueue **vqs; struct vhost_vsock *vsock; int ret; /* This struct is large and allocation could fail, fall back to vmalloc * if there is no other way. */ vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!vsock) return -ENOMEM; vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); if (!vqs) { ret = -ENOMEM; goto out; } vsock->guest_cid = 0; /* no CID assigned yet */ atomic_set(&vsock->queued_replies, 0); vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, VHOST_VSOCK_WEIGHT, true, NULL); file->private_data = vsock; skb_queue_head_init(&vsock->send_pkt_queue); vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); return 0; out: vhost_vsock_free(vsock); return ret; } static void vhost_vsock_flush(struct vhost_vsock *vsock) { vhost_dev_flush(&vsock->dev); } static void vhost_vsock_reset_orphans(struct sock *sk) { struct vsock_sock *vsk = vsock_sk(sk); /* vmci_transport.c doesn't take sk_lock here either. At least we're * under vsock_table_lock so the sock cannot disappear while we're * executing. */ /* If the peer is still valid, no need to reset connection */ if (vhost_vsock_get(vsk->remote_addr.svm_cid)) return; /* If the close timeout is pending, let it expire. This avoids races * with the timeout callback. */ if (vsk->close_work_scheduled) return; sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; sk->sk_state = SS_UNCONNECTED; sk->sk_err = ECONNRESET; sk_error_report(sk); } static int vhost_vsock_dev_release(struct inode *inode, struct file *file) { struct vhost_vsock *vsock = file->private_data; mutex_lock(&vhost_vsock_mutex); if (vsock->guest_cid) hash_del_rcu(&vsock->hash); mutex_unlock(&vhost_vsock_mutex); /* Wait for other CPUs to finish using vsock */ synchronize_rcu(); /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(&vhost_transport.transport, vhost_vsock_reset_orphans); /* Don't check the owner, because we are in the release path, so we * need to stop the vsock device in any case. * vhost_vsock_stop() can not fail in this case, so we don't need to * check the return code. */ vhost_vsock_stop(vsock, false); vhost_vsock_flush(vsock); vhost_dev_stop(&vsock->dev); virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue); vhost_dev_cleanup(&vsock->dev); kfree(vsock->dev.vqs); vhost_vsock_free(vsock); return 0; } static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) { struct vhost_vsock *other; /* Refuse reserved CIDs */ if (guest_cid <= VMADDR_CID_HOST || guest_cid == U32_MAX) return -EINVAL; /* 64-bit CIDs are not yet supported */ if (guest_cid > U32_MAX) return -EINVAL; /* Refuse if CID is assigned to the guest->host transport (i.e. nested * VM), to make the loopback work. */ if (vsock_find_cid(guest_cid)) return -EADDRINUSE; /* Refuse if CID is already in use */ mutex_lock(&vhost_vsock_mutex); other = vhost_vsock_get(guest_cid); if (other && other != vsock) { mutex_unlock(&vhost_vsock_mutex); return -EADDRINUSE; } if (vsock->guest_cid) hash_del_rcu(&vsock->hash); vsock->guest_cid = guest_cid; hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid); mutex_unlock(&vhost_vsock_mutex); return 0; } static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) { struct vhost_virtqueue *vq; int i; if (features & ~VHOST_VSOCK_FEATURES) return -EOPNOTSUPP; mutex_lock(&vsock->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&vsock->dev)) { goto err; } if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { if (vhost_init_device_iotlb(&vsock->dev)) goto err; } if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET)) vsock->seqpacket_allow = true; for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); vq->acked_features = features; mutex_unlock(&vq->mutex); } mutex_unlock(&vsock->dev.mutex); return 0; err: mutex_unlock(&vsock->dev.mutex); return -EFAULT; } static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { struct vhost_vsock *vsock = f->private_data; void __user *argp = (void __user *)arg; u64 guest_cid; u64 features; int start; int r; switch (ioctl) { case VHOST_VSOCK_SET_GUEST_CID: if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) return -EFAULT; return vhost_vsock_set_cid(vsock, guest_cid); case VHOST_VSOCK_SET_RUNNING: if (copy_from_user(&start, argp, sizeof(start))) return -EFAULT; if (start) return vhost_vsock_start(vsock); else return vhost_vsock_stop(vsock, true); case VHOST_GET_FEATURES: features = VHOST_VSOCK_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) return -EFAULT; return 0; case VHOST_SET_FEATURES: if (copy_from_user(&features, argp, sizeof(features))) return -EFAULT; return vhost_vsock_set_features(vsock, features); case VHOST_GET_BACKEND_FEATURES: features = VHOST_VSOCK_BACKEND_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) return -EFAULT; return 0; case VHOST_SET_BACKEND_FEATURES: if (copy_from_user(&features, argp, sizeof(features))) return -EFAULT; if (features & ~VHOST_VSOCK_BACKEND_FEATURES) return -EOPNOTSUPP; vhost_set_backend_features(&vsock->dev, features); return 0; default: mutex_lock(&vsock->dev.mutex); r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); if (r == -ENOIOCTLCMD) r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); else vhost_vsock_flush(vsock); mutex_unlock(&vsock->dev.mutex); return r; } } static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct vhost_vsock *vsock = file->private_data; struct vhost_dev *dev = &vsock->dev; int noblock = file->f_flags & O_NONBLOCK; return vhost_chr_read_iter(dev, to, noblock); } static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct vhost_vsock *vsock = file->private_data; struct vhost_dev *dev = &vsock->dev; return vhost_chr_write_iter(dev, from); } static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait) { struct vhost_vsock *vsock = file->private_data; struct vhost_dev *dev = &vsock->dev; return vhost_chr_poll(file, dev, wait); } static const struct file_operations vhost_vsock_fops = { .owner = THIS_MODULE, .open = vhost_vsock_dev_open, .release = vhost_vsock_dev_release, .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, .read_iter = vhost_vsock_chr_read_iter, .write_iter = vhost_vsock_chr_write_iter, .poll = vhost_vsock_chr_poll, }; static struct miscdevice vhost_vsock_misc = { .minor = VHOST_VSOCK_MINOR, .name = "vhost-vsock", .fops = &vhost_vsock_fops, }; static int __init vhost_vsock_init(void) { int ret; ret = vsock_core_register(&vhost_transport.transport, VSOCK_TRANSPORT_F_H2G); if (ret < 0) return ret; ret = misc_register(&vhost_vsock_misc); if (ret) { vsock_core_unregister(&vhost_transport.transport); return ret; } return 0; }; static void __exit vhost_vsock_exit(void) { misc_deregister(&vhost_vsock_misc); vsock_core_unregister(&vhost_transport.transport); }; module_init(vhost_vsock_init); module_exit(vhost_vsock_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Asias He"); MODULE_DESCRIPTION("vhost transport for vsock "); MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR); MODULE_ALIAS("devname:vhost-vsock");
4 34 63 12 20 20 14 32 15 2 10 3 1 1 2 9 9 2 10 2 2 1 11 32 27 17 32 17 20 20 2 14 1 9 26 9 24 2 24 24 24 2 22 32 32 31 32 51 1 1 49 1 1 9 1 2 1 5 6 32 2 31 12 52 1 19 29 35 1 2 31 7 27 28 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 // SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. * Written by Takashi Sato <t-sato@yk.jp.nec.com> * Akira Fujita <a-fujita@rs.jp.nec.com> */ #include <linux/fs.h> #include <linux/quotaops.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4.h" #include "ext4_extents.h" /** * get_ext_path() - Find an extent path for designated logical block number. * @inode: inode to be searched * @lblock: logical block number to find an extent path * @ppath: pointer to an extent path pointer (for output) * * ext4_find_extent wrapper. Return 0 on success, or a negative error value * on failure. */ static inline int get_ext_path(struct inode *inode, ext4_lblk_t lblock, struct ext4_ext_path **ppath) { struct ext4_ext_path *path; path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); if (path[ext_depth(inode)].p_ext == NULL) { ext4_free_ext_path(path); *ppath = NULL; return -ENODATA; } *ppath = path; return 0; } /** * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem * @first: inode to be locked * @second: inode to be locked * * Acquire write lock of i_data_sem of the two inodes */ void ext4_double_down_write_data_sem(struct inode *first, struct inode *second) { if (first < second) { down_write(&EXT4_I(first)->i_data_sem); down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER); } else { down_write(&EXT4_I(second)->i_data_sem); down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER); } } /** * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem * * @orig_inode: original inode structure to be released its lock first * @donor_inode: donor inode structure to be released its lock second * Release write lock of i_data_sem of two inodes (orig and donor). */ void ext4_double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) { up_write(&EXT4_I(orig_inode)->i_data_sem); up_write(&EXT4_I(donor_inode)->i_data_sem); } /** * mext_check_coverage - Check that all extents in range has the same type * * @inode: inode in question * @from: block offset of inode * @count: block count to be checked * @unwritten: extents expected to be unwritten * @err: pointer to save error value * * Return 1 if all extents in range has expected type, and zero otherwise. */ static int mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, int unwritten, int *err) { struct ext4_ext_path *path = NULL; struct ext4_extent *ext; int ret = 0; ext4_lblk_t last = from + count; while (from < last) { *err = get_ext_path(inode, from, &path); if (*err) goto out; ext = path[ext_depth(inode)].p_ext; if (unwritten != ext4_ext_is_unwritten(ext)) goto out; from += ext4_ext_get_actual_len(ext); } ret = 1; out: ext4_free_ext_path(path); return ret; } /** * mext_folio_double_lock - Grab and lock folio on both @inode1 and @inode2 * * @inode1: the inode structure * @inode2: the inode structure * @index1: folio index * @index2: folio index * @folio: result folio vector * * Grab two locked folio for inode's by inode order */ static int mext_folio_double_lock(struct inode *inode1, struct inode *inode2, pgoff_t index1, pgoff_t index2, struct folio *folio[2]) { struct address_space *mapping[2]; unsigned int flags; BUG_ON(!inode1 || !inode2); if (inode1 < inode2) { mapping[0] = inode1->i_mapping; mapping[1] = inode2->i_mapping; } else { swap(index1, index2); mapping[0] = inode2->i_mapping; mapping[1] = inode1->i_mapping; } flags = memalloc_nofs_save(); folio[0] = __filemap_get_folio(mapping[0], index1, FGP_WRITEBEGIN, mapping_gfp_mask(mapping[0])); if (IS_ERR(folio[0])) { memalloc_nofs_restore(flags); return PTR_ERR(folio[0]); } folio[1] = __filemap_get_folio(mapping[1], index2, FGP_WRITEBEGIN, mapping_gfp_mask(mapping[1])); memalloc_nofs_restore(flags); if (IS_ERR(folio[1])) { folio_unlock(folio[0]); folio_put(folio[0]); return PTR_ERR(folio[1]); } /* * __filemap_get_folio() may not wait on folio's writeback if * BDI not demand that. But it is reasonable to be very conservative * here and explicitly wait on folio's writeback */ folio_wait_writeback(folio[0]); folio_wait_writeback(folio[1]); if (inode1 > inode2) swap(folio[0], folio[1]); return 0; } /* Force page buffers uptodate w/o dropping page's lock */ static int mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to) { struct inode *inode = folio->mapping->host; sector_t block; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; unsigned int blocksize, block_start, block_end; int i, err, nr = 0, partial = 0; BUG_ON(!folio_test_locked(folio)); BUG_ON(folio_test_writeback(folio)); if (folio_test_uptodate(folio)) return 0; blocksize = i_blocksize(inode); head = folio_buffers(folio); if (!head) head = create_empty_buffers(folio, blocksize, 0); block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits); for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (!buffer_uptodate(bh)) partial = 1; continue; } if (buffer_uptodate(bh)) continue; if (!buffer_mapped(bh)) { err = ext4_get_block(inode, block, bh, 0); if (err) { folio_set_error(folio); return err; } if (!buffer_mapped(bh)) { folio_zero_range(folio, block_start, blocksize); set_buffer_uptodate(bh); continue; } } BUG_ON(nr >= MAX_BUF_PER_PAGE); arr[nr++] = bh; } /* No io required */ if (!nr) goto out; for (i = 0; i < nr; i++) { bh = arr[i]; if (!bh_uptodate_or_lock(bh)) { err = ext4_read_bh(bh, 0, NULL); if (err) return err; } } out: if (!partial) folio_mark_uptodate(folio); return 0; } /** * move_extent_per_page - Move extent data per page * * @o_filp: file structure of original file * @donor_inode: donor inode * @orig_page_offset: page index on original file * @donor_page_offset: page index on donor file * @data_offset_in_page: block index where data swapping starts * @block_len_in_page: the number of blocks to be swapped * @unwritten: orig extent is unwritten or not * @err: pointer to save return value * * Save the data in original inode blocks and replace original inode extents * with donor inode extents by calling ext4_swap_extents(). * Finally, write out the saved data in new original inode blocks. Return * replaced block count. */ static int move_extent_per_page(struct file *o_filp, struct inode *donor_inode, pgoff_t orig_page_offset, pgoff_t donor_page_offset, int data_offset_in_page, int block_len_in_page, int unwritten, int *err) { struct inode *orig_inode = file_inode(o_filp); struct folio *folio[2] = {NULL, NULL}; handle_t *handle; ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int tmp_data_size, data_size, replaced_size; int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; struct buffer_head *bh = NULL; /* * It needs twice the amount of ordinary journal buffers because * inode and donor_inode may change each different metadata blocks. */ again: *err = 0; jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks); if (IS_ERR(handle)) { *err = PTR_ERR(handle); return 0; } orig_blk_offset = orig_page_offset * blocks_per_page + data_offset_in_page; donor_blk_offset = donor_page_offset * blocks_per_page + data_offset_in_page; /* Calculate data_size */ if ((orig_blk_offset + block_len_in_page - 1) == ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { /* Replace the last block */ tmp_data_size = orig_inode->i_size & (blocksize - 1); /* * If data_size equal zero, it shows data_size is multiples of * blocksize. So we set appropriate value. */ if (tmp_data_size == 0) tmp_data_size = blocksize; data_size = tmp_data_size + ((block_len_in_page - 1) << orig_inode->i_blkbits); } else data_size = block_len_in_page << orig_inode->i_blkbits; replaced_size = data_size; *err = mext_folio_double_lock(orig_inode, donor_inode, orig_page_offset, donor_page_offset, folio); if (unlikely(*err < 0)) goto stop_journal; /* * If orig extent was unwritten it can become initialized * at any time after i_data_sem was dropped, in order to * serialize with delalloc we have recheck extent while we * hold page's lock, if it is still the case data copy is not * necessary, just swap data blocks between orig and donor. */ VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]); VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]); VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]); if (unwritten) { ext4_double_down_write_data_sem(orig_inode, donor_inode); /* If any of extents in range became initialized we have to * fallback to data copying */ unwritten = mext_check_coverage(orig_inode, orig_blk_offset, block_len_in_page, 1, err); if (*err) goto drop_data_sem; unwritten &= mext_check_coverage(donor_inode, donor_blk_offset, block_len_in_page, 1, err); if (*err) goto drop_data_sem; if (!unwritten) { ext4_double_up_write_data_sem(orig_inode, donor_inode); goto data_copy; } if (!filemap_release_folio(folio[0], 0) || !filemap_release_folio(folio[1], 0)) { *err = -EBUSY; goto drop_data_sem; } replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode, orig_blk_offset, donor_blk_offset, block_len_in_page, 1, err); drop_data_sem: ext4_double_up_write_data_sem(orig_inode, donor_inode); goto unlock_folios; } data_copy: *err = mext_page_mkuptodate(folio[0], from, from + replaced_size); if (*err) goto unlock_folios; /* At this point all buffers in range are uptodate, old mapping layout * is no longer required, try to drop it now. */ if (!filemap_release_folio(folio[0], 0) || !filemap_release_folio(folio[1], 0)) { *err = -EBUSY; goto unlock_folios; } ext4_double_down_write_data_sem(orig_inode, donor_inode); replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode, orig_blk_offset, donor_blk_offset, block_len_in_page, 1, err); ext4_double_up_write_data_sem(orig_inode, donor_inode); if (*err) { if (replaced_count) { block_len_in_page = replaced_count; replaced_size = block_len_in_page << orig_inode->i_blkbits; } else goto unlock_folios; } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ bh = folio_buffers(folio[0]); if (!bh) bh = create_empty_buffers(folio[0], 1 << orig_inode->i_blkbits, 0); for (i = 0; i < data_offset_in_page; i++) bh = bh->b_this_page; for (i = 0; i < block_len_in_page; i++) { *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); if (*err < 0) goto repair_branches; bh = bh->b_this_page; } block_commit_write(&folio[0]->page, from, from + replaced_size); /* Even in case of data=writeback it is reasonable to pin * inode to transaction, to prevent unexpected data loss */ *err = ext4_jbd2_inode_add_write(handle, orig_inode, (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); unlock_folios: folio_unlock(folio[0]); folio_put(folio[0]); folio_unlock(folio[1]); folio_put(folio[1]); stop_journal: ext4_journal_stop(handle); if (*err == -ENOSPC && ext4_should_retry_alloc(sb, &retries)) goto again; /* Buffer was busy because probably is pinned to journal transaction, * force transaction commit may help to free it. */ if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal && jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal)) goto again; return replaced_count; repair_branches: /* * This should never ever happen! * Extents are swapped already, but we are not able to copy data. * Try to swap extents to it's original places */ ext4_double_down_write_data_sem(orig_inode, donor_inode); replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode, orig_blk_offset, donor_blk_offset, block_len_in_page, 0, &err2); ext4_double_up_write_data_sem(orig_inode, donor_inode); if (replaced_count != block_len_in_page) { ext4_error_inode_block(orig_inode, (sector_t)(orig_blk_offset), EIO, "Unable to copy data block," " data will be lost."); *err = -EIO; } replaced_count = 0; goto unlock_folios; } /** * mext_check_arguments - Check whether move extent can be done * * @orig_inode: original inode * @donor_inode: donor inode * @orig_start: logical start offset in block for orig * @donor_start: logical start offset in block for donor * @len: the number of blocks to be moved * * Check the arguments of ext4_move_extents() whether the files can be * exchanged with each other. * Return 0 on success, or a negative error value on failure. */ static int mext_check_arguments(struct inode *orig_inode, struct inode *donor_inode, __u64 orig_start, __u64 donor_start, __u64 *len) { __u64 orig_eof, donor_eof; unsigned int blkbits = orig_inode->i_blkbits; unsigned int blocksize = 1 << blkbits; orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits; donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits; if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { ext4_debug("ext4 move extent: suid or sgid is set" " to donor file [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) return -EPERM; /* Ext4 move extent does not support swap files */ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -ETXTBSY; } if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) { ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EOPNOTSUPP; } /* Ext4 move extent supports only extent based file */ if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " "based file [ino:orig %lu]\n", orig_inode->i_ino); return -EOPNOTSUPP; } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: donor file is not extents " "based file [ino:donor %lu]\n", donor_inode->i_ino); return -EOPNOTSUPP; } if ((!orig_inode->i_size) || (!donor_inode->i_size)) { ext4_debug("ext4 move extent: File size is 0 byte\n"); return -EINVAL; } /* Start offset should be same */ if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) != (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) { ext4_debug("ext4 move extent: orig and donor's start " "offsets are not aligned [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } if ((orig_start >= EXT_MAX_BLOCKS) || (donor_start >= EXT_MAX_BLOCKS) || (*len > EXT_MAX_BLOCKS) || (donor_start + *len >= EXT_MAX_BLOCKS) || (orig_start + *len >= EXT_MAX_BLOCKS)) { ext4_debug("ext4 move extent: Can't handle over [%u] blocks " "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } if (orig_eof <= orig_start) *len = 0; else if (orig_eof < orig_start + *len - 1) *len = orig_eof - orig_start; if (donor_eof <= donor_start) *len = 0; else if (donor_eof < donor_start + *len - 1) *len = donor_eof - donor_start; if (!*len) { ext4_debug("ext4 move extent: len should not be 0 " "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } return 0; } /** * ext4_move_extents - Exchange the specified range of a file * * @o_filp: file structure of the original file * @d_filp: file structure of the donor file * @orig_blk: start offset in block for orig * @donor_blk: start offset in block for donor * @len: the number of blocks to be moved * @moved_len: moved block length * * This function returns 0 and moved block length is set in moved_len * if succeed, otherwise returns error value. * */ int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, __u64 donor_blk, __u64 len, __u64 *moved_len) { struct inode *orig_inode = file_inode(o_filp); struct inode *donor_inode = file_inode(d_filp); struct ext4_ext_path *path = NULL; int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits; ext4_lblk_t o_end, o_start = orig_blk; ext4_lblk_t d_start = donor_blk; int ret; if (orig_inode->i_sb != donor_inode->i_sb) { ext4_debug("ext4 move extent: The argument files " "should be in same FS [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } /* orig and donor should be different inodes */ if (orig_inode == donor_inode) { ext4_debug("ext4 move extent: The argument files should not " "be same inode [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } /* Regular file check */ if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { ext4_debug("ext4 move extent: The argument files should be " "regular file [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } /* TODO: it's not obvious how to swap blocks for inodes with full journaling enabled */ if (ext4_should_journal_data(orig_inode) || ext4_should_journal_data(donor_inode)) { ext4_msg(orig_inode->i_sb, KERN_ERR, "Online defrag not supported with data journaling"); return -EOPNOTSUPP; } if (IS_ENCRYPTED(orig_inode) || IS_ENCRYPTED(donor_inode)) { ext4_msg(orig_inode->i_sb, KERN_ERR, "Online defrag not supported for encrypted files"); return -EOPNOTSUPP; } /* Protect orig and donor inodes against a truncate */ lock_two_nondirectories(orig_inode, donor_inode); /* Wait for all existing dio workers */ inode_dio_wait(orig_inode); inode_dio_wait(donor_inode); /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ ret = mext_check_arguments(orig_inode, donor_inode, orig_blk, donor_blk, &len); if (ret) goto out; o_end = o_start + len; *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; pgoff_t orig_page_index, donor_page_index; int offset_in_page; int unwritten, cur_len; ret = get_ext_path(orig_inode, o_start, &path); if (ret) goto out; ex = path[path->p_depth].p_ext; cur_blk = le32_to_cpu(ex->ee_block); cur_len = ext4_ext_get_actual_len(ex); /* Check hole before the start pos */ if (cur_blk + cur_len - 1 < o_start) { next_blk = ext4_ext_next_allocated_block(path); if (next_blk == EXT_MAX_BLOCKS) { ret = -ENODATA; goto out; } d_start += next_blk - o_start; o_start = next_blk; continue; /* Check hole after the start pos */ } else if (cur_blk > o_start) { /* Skip hole */ d_start += cur_blk - o_start; o_start = cur_blk; /* Extent inside requested range ?*/ if (cur_blk >= o_end) goto out; } else { /* in_range(o_start, o_blk, o_len) */ cur_len += cur_blk - o_start; } unwritten = ext4_ext_is_unwritten(ex); if (o_end - o_start < cur_len) cur_len = o_end - o_start; orig_page_index = o_start >> (PAGE_SHIFT - orig_inode->i_blkbits); donor_page_index = d_start >> (PAGE_SHIFT - donor_inode->i_blkbits); offset_in_page = o_start % blocks_per_page; if (cur_len > blocks_per_page - offset_in_page) cur_len = blocks_per_page - offset_in_page; /* * Up semaphore to avoid following problems: * a. transaction deadlock among ext4_journal_start, * ->write_begin via pagefault, and jbd2_journal_commit * b. racing with ->read_folio, ->write_begin, and * ext4_get_block in move_extent_per_page */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); ext4_double_down_write_data_sem(orig_inode, donor_inode); if (ret < 0) break; o_start += cur_len; d_start += cur_len; } out: if (*moved_len) { ext4_discard_preallocations(orig_inode); ext4_discard_preallocations(donor_inode); } ext4_free_ext_path(path); ext4_double_up_write_data_sem(orig_inode, donor_inode); unlock_two_nondirectories(orig_inode, donor_inode); return ret; }
14 95 741 741 742 741 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * SHA1 Secure Hash Algorithm. * * Derived from cryptoapi implementation, adapted for in-place * scatterlist interface. * * Copyright (c) Alan Smithee. * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> */ #include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/byteorder.h> const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE] = { 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8, 0x07, 0x09 }; EXPORT_SYMBOL_GPL(sha1_zero_message_hash); static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src, int blocks) { u32 temp[SHA1_WORKSPACE_WORDS]; while (blocks--) { sha1_transform(sst->state, src, temp); src += SHA1_BLOCK_SIZE; } memzero_explicit(temp, sizeof(temp)); } int crypto_sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_base_do_update(desc, data, len, sha1_generic_block_fn); } EXPORT_SYMBOL(crypto_sha1_update); static int sha1_final(struct shash_desc *desc, u8 *out) { sha1_base_do_finalize(desc, sha1_generic_block_fn); return sha1_base_finish(desc, out); } int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { sha1_base_do_update(desc, data, len, sha1_generic_block_fn); return sha1_final(desc, out); } EXPORT_SYMBOL(crypto_sha1_finup); static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = crypto_sha1_update, .final = sha1_final, .finup = crypto_sha1_finup, .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name= "sha1-generic", .cra_priority = 100, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int __init sha1_generic_mod_init(void) { return crypto_register_shash(&alg); } static void __exit sha1_generic_mod_fini(void) { crypto_unregister_shash(&alg); } subsys_initcall(sha1_generic_mod_init); module_exit(sha1_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); MODULE_ALIAS_CRYPTO("sha1"); MODULE_ALIAS_CRYPTO("sha1-generic");
17 17 76 76 76 9 9 82 82 81 81 76 9 82 15 12 3 12 3 87 87 87 87 87 3 86 9 86 85 87 4 15 1 2 1 16 16 87 15 19 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 /* * linux/drivers/video/console/bitblit.c -- BitBlitting Operation * * Originally from the 'accel_*' routines in drivers/video/console/fbcon.c * * Copyright (C) 2004 Antonino Daplas <adaplas @pol.net> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/fb.h> #include <linux/vt_kern.h> #include <linux/console.h> #include <asm/types.h> #include "fbcon.h" /* * Accelerated handlers. */ static void update_attr(u8 *dst, u8 *src, int attribute, struct vc_data *vc) { int i, offset = (vc->vc_font.height < 10) ? 1 : 2; int width = DIV_ROUND_UP(vc->vc_font.width, 8); unsigned int cellsize = vc->vc_font.height * width; u8 c; offset = cellsize - (offset * width); for (i = 0; i < cellsize; i++) { c = src[i]; if (attribute & FBCON_ATTRIBUTE_UNDERLINE && i >= offset) c = 0xff; if (attribute & FBCON_ATTRIBUTE_BOLD) c |= c >> 1; if (attribute & FBCON_ATTRIBUTE_REVERSE) c = ~c; dst[i] = c; } } static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy, int sx, int dy, int dx, int height, int width) { struct fb_copyarea area; area.sx = sx * vc->vc_font.width; area.sy = sy * vc->vc_font.height; area.dx = dx * vc->vc_font.width; area.dy = dy * vc->vc_font.height; area.height = height * vc->vc_font.height; area.width = width * vc->vc_font.width; info->fbops->fb_copyarea(info, &area); } static void bit_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) { int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; struct fb_fillrect region; region.color = attr_bgcol_ec(bgshift, vc, info); region.dx = sx * vc->vc_font.width; region.dy = sy * vc->vc_font.height; region.width = width * vc->vc_font.width; region.height = height * vc->vc_font.height; region.rop = ROP_COPY; info->fbops->fb_fillrect(info, &region); } static inline void bit_putcs_aligned(struct vc_data *vc, struct fb_info *info, const u16 *s, u32 attr, u32 cnt, u32 d_pitch, u32 s_pitch, u32 cellsize, struct fb_image *image, u8 *buf, u8 *dst) { u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; u32 idx = vc->vc_font.width >> 3; u8 *src; while (cnt--) { src = vc->vc_font.data + (scr_readw(s++)& charmask)*cellsize; if (attr) { update_attr(buf, src, attr, vc); src = buf; } if (likely(idx == 1)) __fb_pad_aligned_buffer(dst, d_pitch, src, idx, image->height); else fb_pad_aligned_buffer(dst, d_pitch, src, idx, image->height); dst += s_pitch; } info->fbops->fb_imageblit(info, image); } static inline void bit_putcs_unaligned(struct vc_data *vc, struct fb_info *info, const u16 *s, u32 attr, u32 cnt, u32 d_pitch, u32 s_pitch, u32 cellsize, struct fb_image *image, u8 *buf, u8 *dst) { u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; u32 shift_low = 0, mod = vc->vc_font.width % 8; u32 shift_high = 8; u32 idx = vc->vc_font.width >> 3; u8 *src; while (cnt--) { src = vc->vc_font.data + (scr_readw(s++)& charmask)*cellsize; if (attr) { update_attr(buf, src, attr, vc); src = buf; } fb_pad_unaligned_buffer(dst, d_pitch, src, idx, image->height, shift_high, shift_low, mod); shift_low += mod; dst += (shift_low >= 8) ? s_pitch : s_pitch - 1; shift_low &= 7; shift_high = 8 - shift_low; } info->fbops->fb_imageblit(info, image); } static void bit_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx, int fg, int bg) { struct fb_image image; u32 width = DIV_ROUND_UP(vc->vc_font.width, 8); u32 cellsize = width * vc->vc_font.height; u32 maxcnt = info->pixmap.size/cellsize; u32 scan_align = info->pixmap.scan_align - 1; u32 buf_align = info->pixmap.buf_align - 1; u32 mod = vc->vc_font.width % 8, cnt, pitch, size; u32 attribute = get_attribute(info, scr_readw(s)); u8 *dst, *buf = NULL; image.fg_color = fg; image.bg_color = bg; image.dx = xx * vc->vc_font.width; image.dy = yy * vc->vc_font.height; image.height = vc->vc_font.height; image.depth = 1; if (attribute) { buf = kmalloc(cellsize, GFP_ATOMIC); if (!buf) return; } while (count) { if (count > maxcnt) cnt = maxcnt; else cnt = count; image.width = vc->vc_font.width * cnt; pitch = DIV_ROUND_UP(image.width, 8) + scan_align; pitch &= ~scan_align; size = pitch * image.height + buf_align; size &= ~buf_align; dst = fb_get_buffer_offset(info, &info->pixmap, size); image.data = dst; if (!mod) bit_putcs_aligned(vc, info, s, attribute, cnt, pitch, width, cellsize, &image, buf, dst); else bit_putcs_unaligned(vc, info, s, attribute, cnt, pitch, width, cellsize, &image, buf, dst); image.dx += cnt * vc->vc_font.width; count -= cnt; s += cnt; } /* buf is always NULL except when in monochrome mode, so in this case it's a gain to check buf against NULL even though kfree() handles NULL pointers just fine */ if (unlikely(buf)) kfree(buf); } static void bit_clear_margins(struct vc_data *vc, struct fb_info *info, int color, int bottom_only) { unsigned int cw = vc->vc_font.width; unsigned int ch = vc->vc_font.height; unsigned int rw = info->var.xres - (vc->vc_cols*cw); unsigned int bh = info->var.yres - (vc->vc_rows*ch); unsigned int rs = info->var.xres - rw; unsigned int bs = info->var.yres - bh; struct fb_fillrect region; region.color = color; region.rop = ROP_COPY; if ((int) rw > 0 && !bottom_only) { region.dx = info->var.xoffset + rs; region.dy = 0; region.width = rw; region.height = info->var.yres_virtual; info->fbops->fb_fillrect(info, &region); } if ((int) bh > 0) { region.dx = info->var.xoffset; region.dy = info->var.yoffset + bs; region.width = rs; region.height = bh; info->fbops->fb_fillrect(info, &region); } } static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode, int fg, int bg) { struct fb_cursor cursor; struct fbcon_ops *ops = info->fbcon_par; unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; int w = DIV_ROUND_UP(vc->vc_font.width, 8), c; int y = real_y(ops->p, vc->state.y); int attribute, use_sw = vc->vc_cursor_type & CUR_SW; int err = 1; char *src; cursor.set = 0; if (!vc->vc_font.data) return; c = scr_readw((u16 *) vc->vc_pos); attribute = get_attribute(info, c); src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height)); if (ops->cursor_state.image.data != src || ops->cursor_reset) { ops->cursor_state.image.data = src; cursor.set |= FB_CUR_SETIMAGE; } if (attribute) { u8 *dst; dst = kmalloc_array(w, vc->vc_font.height, GFP_ATOMIC); if (!dst) return; kfree(ops->cursor_data); ops->cursor_data = dst; update_attr(dst, src, attribute, vc); src = dst; } if (ops->cursor_state.image.fg_color != fg || ops->cursor_state.image.bg_color != bg || ops->cursor_reset) { ops->cursor_state.image.fg_color = fg; ops->cursor_state.image.bg_color = bg; cursor.set |= FB_CUR_SETCMAP; } if ((ops->cursor_state.image.dx != (vc->vc_font.width * vc->state.x)) || (ops->cursor_state.image.dy != (vc->vc_font.height * y)) || ops->cursor_reset) { ops->cursor_state.image.dx = vc->vc_font.width * vc->state.x; ops->cursor_state.image.dy = vc->vc_font.height * y; cursor.set |= FB_CUR_SETPOS; } if (ops->cursor_state.image.height != vc->vc_font.height || ops->cursor_state.image.width != vc->vc_font.width || ops->cursor_reset) { ops->cursor_state.image.height = vc->vc_font.height; ops->cursor_state.image.width = vc->vc_font.width; cursor.set |= FB_CUR_SETSIZE; } if (ops->cursor_state.hot.x || ops->cursor_state.hot.y || ops->cursor_reset) { ops->cursor_state.hot.x = cursor.hot.y = 0; cursor.set |= FB_CUR_SETHOT; } if (cursor.set & FB_CUR_SETSIZE || vc->vc_cursor_type != ops->p->cursor_shape || ops->cursor_state.mask == NULL || ops->cursor_reset) { char *mask = kmalloc_array(w, vc->vc_font.height, GFP_ATOMIC); int cur_height, size, i = 0; u8 msk = 0xff; if (!mask) return; kfree(ops->cursor_state.mask); ops->cursor_state.mask = mask; ops->p->cursor_shape = vc->vc_cursor_type; cursor.set |= FB_CUR_SETSHAPE; switch (CUR_SIZE(ops->p->cursor_shape)) { case CUR_NONE: cur_height = 0; break; case CUR_UNDERLINE: cur_height = (vc->vc_font.height < 10) ? 1 : 2; break; case CUR_LOWER_THIRD: cur_height = vc->vc_font.height/3; break; case CUR_LOWER_HALF: cur_height = vc->vc_font.height >> 1; break; case CUR_TWO_THIRDS: cur_height = (vc->vc_font.height << 1)/3; break; case CUR_BLOCK: default: cur_height = vc->vc_font.height; break; } size = (vc->vc_font.height - cur_height) * w; while (size--) mask[i++] = ~msk; size = cur_height * w; while (size--) mask[i++] = msk; } switch (mode) { case CM_ERASE: ops->cursor_state.enable = 0; break; case CM_DRAW: case CM_MOVE: default: ops->cursor_state.enable = (use_sw) ? 0 : 1; break; } cursor.image.data = src; cursor.image.fg_color = ops->cursor_state.image.fg_color; cursor.image.bg_color = ops->cursor_state.image.bg_color; cursor.image.dx = ops->cursor_state.image.dx; cursor.image.dy = ops->cursor_state.image.dy; cursor.image.height = ops->cursor_state.image.height; cursor.image.width = ops->cursor_state.image.width; cursor.hot.x = ops->cursor_state.hot.x; cursor.hot.y = ops->cursor_state.hot.y; cursor.mask = ops->cursor_state.mask; cursor.enable = ops->cursor_state.enable; cursor.image.depth = 1; cursor.rop = ROP_XOR; if (info->fbops->fb_cursor) err = info->fbops->fb_cursor(info, &cursor); if (err) soft_cursor(info, &cursor); ops->cursor_reset = 0; } static int bit_update_start(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; int err; err = fb_pan_display(info, &ops->var); ops->var.xoffset = info->var.xoffset; ops->var.yoffset = info->var.yoffset; ops->var.vmode = info->var.vmode; return err; } void fbcon_set_bitops(struct fbcon_ops *ops) { ops->bmove = bit_bmove; ops->clear = bit_clear; ops->putcs = bit_putcs; ops->clear_margins = bit_clear_margins; ops->cursor = bit_cursor; ops->update_start = bit_update_start; ops->rotate_font = NULL; if (ops->rotate) fbcon_set_rotate(ops); }
7 15 6 13 26 26 2 17 10 4 4 1680 1662 35 7 6 5 3 7 7 7 3 1 16 8 1 7 4 5 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 // SPDX-License-Identifier: GPL-2.0-or-later /* * Directory notifications for Linux. * * Copyright (C) 2000,2001,2002 Stephen Rothwell * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * dnotify was largly rewritten to use the new fsnotify infrastructure */ #include <linux/fs.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/dnotify.h> #include <linux/init.h> #include <linux/security.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/fdtable.h> #include <linux/fsnotify_backend.h> static int dir_notify_enable __read_mostly = 1; #ifdef CONFIG_SYSCTL static struct ctl_table dnotify_sysctls[] = { { .procname = "dir-notify-enable", .data = &dir_notify_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static void __init dnotify_sysctl_init(void) { register_sysctl_init("fs", dnotify_sysctls); } #else #define dnotify_sysctl_init() do { } while (0) #endif static struct kmem_cache *dnotify_struct_cache __ro_after_init; static struct kmem_cache *dnotify_mark_cache __ro_after_init; static struct fsnotify_group *dnotify_group __ro_after_init; /* * dnotify will attach one of these to each inode (i_fsnotify_marks) which * is being watched by dnotify. If multiple userspace applications are watching * the same directory with dnotify their information is chained in dn */ struct dnotify_mark { struct fsnotify_mark fsn_mark; struct dnotify_struct *dn; }; /* * When a process starts or stops watching an inode the set of events which * dnotify cares about for that inode may change. This function runs the * list of everything receiving dnotify events about this directory and calculates * the set of all those events. After it updates what dnotify is interested in * it calls the fsnotify function so it can update the set of all events relevant * to this inode. */ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) { __u32 new_mask = 0; struct dnotify_struct *dn; struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); assert_spin_locked(&fsn_mark->lock); for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next) new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); if (fsn_mark->mask == new_mask) return; fsn_mark->mask = new_mask; fsnotify_recalc_mask(fsn_mark->connector); } /* * Mains fsnotify call where events are delivered to dnotify. * Find the dnotify mark on the relevant inode, run the list of dnotify structs * on that mark and determine which of them has expressed interest in receiving * events of this type. When found send the correct process and signal and * destroy the dnotify struct if it was not registered to receive multiple * events. */ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; struct fown_struct *fown; __u32 test_mask = mask & ~FS_EVENT_ON_CHILD; /* not a dir, dnotify doesn't care */ if (!dir && !(mask & FS_ISDIR)) return 0; dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); spin_lock(&inode_mark->lock); prev = &dn_mark->dn; while ((dn = *prev) != NULL) { if ((dn->dn_mask & test_mask) == 0) { prev = &dn->dn_next; continue; } fown = &dn->dn_filp->f_owner; send_sigio(fown, dn->dn_fd, POLL_MSG); if (dn->dn_mask & FS_DN_MULTISHOT) prev = &dn->dn_next; else { *prev = dn->dn_next; kmem_cache_free(dnotify_struct_cache, dn); dnotify_recalc_inode_mask(inode_mark); } } spin_unlock(&inode_mark->lock); return 0; } static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) { struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); BUG_ON(dn_mark->dn); kmem_cache_free(dnotify_mark_cache, dn_mark); } static const struct fsnotify_ops dnotify_fsnotify_ops = { .handle_inode_event = dnotify_handle_event, .free_mark = dnotify_free_mark, }; /* * Called every time a file is closed. Looks first for a dnotify mark on the * inode. If one is found run all of the ->dn structures attached to that * mark for one relevant to this process closing the file and remove that * dnotify_struct. If that was the last dnotify_struct also remove the * fsnotify_mark. */ void dnotify_flush(struct file *filp, fl_owner_t id) { struct fsnotify_mark *fsn_mark; struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; struct inode *inode; bool free = false; inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) return; fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); fsnotify_group_lock(dnotify_group); spin_lock(&fsn_mark->lock); prev = &dn_mark->dn; while ((dn = *prev) != NULL) { if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { *prev = dn->dn_next; kmem_cache_free(dnotify_struct_cache, dn); dnotify_recalc_inode_mask(fsn_mark); break; } prev = &dn->dn_next; } spin_unlock(&fsn_mark->lock); /* nothing else could have found us thanks to the dnotify_groups mark_mutex */ if (dn_mark->dn == NULL) { fsnotify_detach_mark(fsn_mark); free = true; } fsnotify_group_unlock(dnotify_group); if (free) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); } /* this conversion is done only at watch creation */ static __u32 convert_arg(unsigned int arg) { __u32 new_mask = FS_EVENT_ON_CHILD; if (arg & DN_MULTISHOT) new_mask |= FS_DN_MULTISHOT; if (arg & DN_DELETE) new_mask |= (FS_DELETE | FS_MOVED_FROM); if (arg & DN_MODIFY) new_mask |= FS_MODIFY; if (arg & DN_ACCESS) new_mask |= FS_ACCESS; if (arg & DN_ATTRIB) new_mask |= FS_ATTRIB; if (arg & DN_RENAME) new_mask |= FS_RENAME; if (arg & DN_CREATE) new_mask |= (FS_CREATE | FS_MOVED_TO); return new_mask; } /* * If multiple processes watch the same inode with dnotify there is only one * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct * onto that mark. This function either attaches the new dnotify_struct onto * that list, or it |= the mask onto an existing dnofiy_struct. */ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark, fl_owner_t id, int fd, struct file *filp, __u32 mask) { struct dnotify_struct *odn; odn = dn_mark->dn; while (odn != NULL) { /* adding more events to existing dnofiy_struct? */ if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { odn->dn_fd = fd; odn->dn_mask |= mask; return -EEXIST; } odn = odn->dn_next; } dn->dn_mask = mask; dn->dn_fd = fd; dn->dn_filp = filp; dn->dn_owner = id; dn->dn_next = dn_mark->dn; dn_mark->dn = dn; return 0; } /* * When a process calls fcntl to attach a dnotify watch to a directory it ends * up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be * attached to the fsnotify_mark. */ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) { struct dnotify_mark *new_dn_mark, *dn_mark; struct fsnotify_mark *new_fsn_mark, *fsn_mark; struct dnotify_struct *dn; struct inode *inode; fl_owner_t id = current->files; struct file *f = NULL; int destroy = 0, error = 0; __u32 mask; /* we use these to tell if we need to kfree */ new_fsn_mark = NULL; dn = NULL; if (!dir_notify_enable) { error = -EINVAL; goto out_err; } /* a 0 mask means we are explicitly removing the watch */ if ((arg & ~DN_MULTISHOT) == 0) { dnotify_flush(filp, id); error = 0; goto out_err; } /* dnotify only works on directories */ inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) { error = -ENOTDIR; goto out_err; } /* * convert the userspace DN_* "arg" to the internal FS_* * defined in fsnotify */ mask = convert_arg(arg); error = security_path_notify(&filp->f_path, mask, FSNOTIFY_OBJ_TYPE_INODE); if (error) goto out_err; /* expect most fcntl to add new rather than augment old */ dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL); if (!dn) { error = -ENOMEM; goto out_err; } /* new fsnotify mark, we expect most fcntl calls to add a new mark */ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); if (!new_dn_mark) { error = -ENOMEM; goto out_err; } /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); new_fsn_mark->mask = mask; new_dn_mark->dn = NULL; /* this is needed to prevent the fcntl/close race described below */ fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { fsnotify_group_unlock(dnotify_group); goto out_err; } spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; /* we used new_fsn_mark, so don't free it */ new_fsn_mark = NULL; } rcu_read_lock(); f = lookup_fdget_rcu(fd); rcu_read_unlock(); /* if (f != filp) means that we lost a race and another task/thread * actually closed the fd we are still playing with before we grabbed * the dnotify_groups mark_mutex and fsn_mark->lock. Since closing the * fd is the only time we clean up the marks we need to get our mark * off the list. */ if (f != filp) { /* if we added ourselves, shoot ourselves, it's possible that * the flush actually did shoot this fsn_mark. That's fine too * since multiple calls to destroy_mark is perfectly safe, if * we found a dn_mark already attached to the inode, just sod * off silently as the flush at close time dealt with it. */ if (dn_mark == new_dn_mark) destroy = 1; error = 0; goto out; } __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); error = attach_dn(dn, dn_mark, id, fd, filp, mask); /* !error means that we attached the dn to the dn_mark, so don't free it */ if (!error) dn = NULL; /* -EEXIST means that we didn't add this new dn and used an old one. * that isn't an error (and the unused dn should be freed) */ else if (error == -EEXIST) error = 0; dnotify_recalc_inode_mask(fsn_mark); out: spin_unlock(&fsn_mark->lock); if (destroy) fsnotify_detach_mark(fsn_mark); fsnotify_group_unlock(dnotify_group); if (destroy) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); out_err: if (new_fsn_mark) fsnotify_put_mark(new_fsn_mark); if (dn) kmem_cache_free(dnotify_struct_cache, dn); if (f) fput(f); return error; } static int __init dnotify_init(void) { dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, FSNOTIFY_GROUP_NOFS); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); dnotify_sysctl_init(); return 0; } module_init(dnotify_init)
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 // SPDX-License-Identifier: GPL-2.0 /****************************************************************************** * rtl8712_xmit.c * * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved. * Linux device driver for RTL8192SU * * Modifications for inclusion into the Linux staging tree are * Copyright(c) 2010 Larry Finger. All rights reserved. * * Contact information: * WLAN FAE <wlanfae@realtek.com> * Larry Finger <Larry.Finger@lwfinger.net> * ******************************************************************************/ #define _RTL8712_XMIT_C_ #include "osdep_service.h" #include "drv_types.h" #include "wifi.h" #include "osdep_intf.h" #include "usb_ops.h" static void dump_xframe(struct _adapter *padapter, struct xmit_frame *pxmitframe); static void update_txdesc(struct xmit_frame *pxmitframe, uint *pmem, int sz); sint _r8712_init_hw_txqueue(struct hw_txqueue *phw_txqueue, u8 ac_tag) { phw_txqueue->ac_tag = ac_tag; switch (ac_tag) { case BE_QUEUE_INX: phw_txqueue->ff_hwaddr = RTL8712_DMA_BEQ; break; case BK_QUEUE_INX: phw_txqueue->ff_hwaddr = RTL8712_DMA_BKQ; break; case VI_QUEUE_INX: phw_txqueue->ff_hwaddr = RTL8712_DMA_VIQ; break; case VO_QUEUE_INX: phw_txqueue->ff_hwaddr = RTL8712_DMA_VOQ; break; case BMC_QUEUE_INX: phw_txqueue->ff_hwaddr = RTL8712_DMA_BEQ; break; } return _SUCCESS; } int r8712_txframes_sta_ac_pending(struct _adapter *padapter, struct pkt_attrib *pattrib) { struct sta_info *psta; struct tx_servq *ptxservq; int priority = pattrib->priority; psta = pattrib->psta; switch (priority) { case 1: case 2: ptxservq = &psta->sta_xmitpriv.bk_q; break; case 4: case 5: ptxservq = &psta->sta_xmitpriv.vi_q; break; case 6: case 7: ptxservq = &psta->sta_xmitpriv.vo_q; break; case 0: case 3: default: ptxservq = &psta->sta_xmitpriv.be_q; break; } return ptxservq->qcnt; } static u32 get_ff_hwaddr(struct xmit_frame *pxmitframe) { u32 addr = 0; struct pkt_attrib *pattrib = &pxmitframe->attrib; struct _adapter *padapter = pxmitframe->padapter; struct dvobj_priv *pdvobj = &padapter->dvobjpriv; if (pxmitframe->frame_tag == TXAGG_FRAMETAG) { addr = RTL8712_DMA_H2CCMD; } else if (pxmitframe->frame_tag == MGNT_FRAMETAG) { addr = RTL8712_DMA_MGTQ; } else if (pdvobj->nr_endpoint == 6) { switch (pattrib->priority) { case 0: case 3: addr = RTL8712_DMA_BEQ; break; case 1: case 2: addr = RTL8712_DMA_BKQ; break; case 4: case 5: addr = RTL8712_DMA_VIQ; break; case 6: case 7: addr = RTL8712_DMA_VOQ; break; case 0x10: case 0x11: case 0x12: case 0x13: addr = RTL8712_DMA_H2CCMD; break; default: addr = RTL8712_DMA_BEQ; break; } } else if (pdvobj->nr_endpoint == 4) { switch (pattrib->qsel) { case 0: case 3: case 1: case 2: addr = RTL8712_DMA_BEQ;/*RTL8712_EP_LO;*/ break; case 4: case 5: case 6: case 7: addr = RTL8712_DMA_VOQ;/*RTL8712_EP_HI;*/ break; case 0x10: case 0x11: case 0x12: case 0x13: addr = RTL8712_DMA_H2CCMD; break; default: addr = RTL8712_DMA_BEQ;/*RTL8712_EP_LO;*/ break; } } return addr; } static struct xmit_frame *dequeue_one_xmitframe(struct xmit_priv *pxmitpriv, struct hw_xmit *phwxmit, struct tx_servq *ptxservq, struct __queue *pframe_queue) { struct list_head *xmitframe_plist, *xmitframe_phead; struct xmit_frame *pxmitframe = NULL; xmitframe_phead = &pframe_queue->queue; xmitframe_plist = xmitframe_phead->next; if (!end_of_queue_search(xmitframe_phead, xmitframe_plist)) { pxmitframe = container_of(xmitframe_plist, struct xmit_frame, list); list_del_init(&pxmitframe->list); ptxservq->qcnt--; phwxmit->txcmdcnt++; } return pxmitframe; } static struct xmit_frame *dequeue_xframe_ex(struct xmit_priv *pxmitpriv, struct hw_xmit *phwxmit_i, sint entry) { unsigned long irqL0; struct list_head *sta_plist, *sta_phead; struct hw_xmit *phwxmit; struct tx_servq *ptxservq = NULL; struct __queue *pframe_queue = NULL; struct xmit_frame *pxmitframe = NULL; int i, inx[4]; int j, acirp_cnt[4]; /*entry indx: 0->vo, 1->vi, 2->be, 3->bk.*/ inx[0] = 0; acirp_cnt[0] = pxmitpriv->voq_cnt; inx[1] = 1; acirp_cnt[1] = pxmitpriv->viq_cnt; inx[2] = 2; acirp_cnt[2] = pxmitpriv->beq_cnt; inx[3] = 3; acirp_cnt[3] = pxmitpriv->bkq_cnt; for (i = 0; i < 4; i++) { for (j = i + 1; j < 4; j++) { if (acirp_cnt[j] < acirp_cnt[i]) { swap(acirp_cnt[i], acirp_cnt[j]); swap(inx[i], inx[j]); } } } spin_lock_irqsave(&pxmitpriv->lock, irqL0); for (i = 0; i < entry; i++) { phwxmit = phwxmit_i + inx[i]; sta_phead = &phwxmit->sta_queue->queue; sta_plist = sta_phead->next; while (!end_of_queue_search(sta_phead, sta_plist)) { ptxservq = container_of(sta_plist, struct tx_servq, tx_pending); pframe_queue = &ptxservq->sta_pending; pxmitframe = dequeue_one_xmitframe(pxmitpriv, phwxmit, ptxservq, pframe_queue); if (pxmitframe) { phwxmit->accnt--; goto exit_dequeue_xframe_ex; } sta_plist = sta_plist->next; /*Remove sta node when there are no pending packets.*/ if (list_empty(&pframe_queue->queue)) { /* must be done after sta_plist->next * and before break */ list_del_init(&ptxservq->tx_pending); } } } exit_dequeue_xframe_ex: spin_unlock_irqrestore(&pxmitpriv->lock, irqL0); return pxmitframe; } void r8712_do_queue_select(struct _adapter *padapter, struct pkt_attrib *pattrib) { unsigned int qsel = 0; struct dvobj_priv *pdvobj = &padapter->dvobjpriv; if (pdvobj->nr_endpoint == 6) { qsel = (unsigned int)pattrib->priority; } else if (pdvobj->nr_endpoint == 4) { qsel = (unsigned int)pattrib->priority; if (qsel == 0 || qsel == 3) qsel = 3; else if (qsel == 1 || qsel == 2) qsel = 1; else if (qsel == 4 || qsel == 5) qsel = 5; else if (qsel == 6 || qsel == 7) qsel = 7; else qsel = 3; } pattrib->qsel = qsel; } #ifdef CONFIG_R8712_TX_AGGR void r8712_construct_txaggr_cmd_desc(struct xmit_buf *pxmitbuf) { struct tx_desc *ptx_desc = (struct tx_desc *)pxmitbuf->pbuf; /* Fill up TxCmd Descriptor according as USB FW Tx Aaggregation info.*/ /* dw0 */ ptx_desc->txdw0 = cpu_to_le32(CMD_HDR_SZ & 0xffff); ptx_desc->txdw0 |= cpu_to_le32(((TXDESC_SIZE + OFFSET_SZ) << OFFSET_SHT) & 0x00ff0000); ptx_desc->txdw0 |= cpu_to_le32(OWN | FSG | LSG); /* dw1 */ ptx_desc->txdw1 |= cpu_to_le32((0x13 << QSEL_SHT) & 0x00001f00); } void r8712_construct_txaggr_cmd_hdr(struct xmit_buf *pxmitbuf) { struct xmit_frame *pxmitframe = (struct xmit_frame *) pxmitbuf->priv_data; struct _adapter *padapter = pxmitframe->padapter; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; struct cmd_hdr *pcmd_hdr = (struct cmd_hdr *) (pxmitbuf->pbuf + TXDESC_SIZE); /* Fill up Cmd Header for USB FW Tx Aggregation.*/ /* dw0 */ pcmd_hdr->cmd_dw0 = cpu_to_le32((GEN_CMD_CODE(_AMSDU_TO_AMPDU) << 16) | (pcmdpriv->cmd_seq << 24)); pcmdpriv->cmd_seq++; } void r8712_append_mpdu_unit(struct xmit_buf *pxmitbuf, struct xmit_frame *pxmitframe) { struct _adapter *padapter = pxmitframe->padapter; struct tx_desc *ptx_desc = (struct tx_desc *)pxmitbuf->pbuf; int last_txcmdsz = 0; int padding_sz = 0; /* 802.3->802.11 converter */ r8712_xmitframe_coalesce(padapter, pxmitframe->pkt, pxmitframe); /* free skb struct */ r8712_xmit_complete(padapter, pxmitframe); if (pxmitframe->attrib.ether_type != 0x0806) { if ((pxmitframe->attrib.ether_type != 0x888e) && (pxmitframe->attrib.dhcp_pkt != 1)) { r8712_issue_addbareq_cmd(padapter, pxmitframe->attrib.priority); } } pxmitframe->last[0] = 1; update_txdesc(pxmitframe, (uint *)(pxmitframe->buf_addr), pxmitframe->attrib.last_txcmdsz); /*padding zero */ last_txcmdsz = pxmitframe->attrib.last_txcmdsz; padding_sz = (8 - (last_txcmdsz % 8)); if ((last_txcmdsz % 8) != 0) { int i; for (i = 0; i < padding_sz; i++) *(pxmitframe->buf_addr + TXDESC_SIZE + last_txcmdsz + i) = 0; } /* Add the new mpdu's length */ ptx_desc->txdw0 = cpu_to_le32((ptx_desc->txdw0 & 0xffff0000) | ((ptx_desc->txdw0 & 0x0000ffff) + ((TXDESC_SIZE + last_txcmdsz + padding_sz) & 0x0000ffff))); } void r8712_xmitframe_aggr_1st(struct xmit_buf *pxmitbuf, struct xmit_frame *pxmitframe) { /* linux complete context doesn't need to protect */ pxmitframe->pxmitbuf = pxmitbuf; pxmitbuf->priv_data = pxmitframe; pxmitframe->pxmit_urb[0] = pxmitbuf->pxmit_urb[0]; /* buffer addr assoc */ pxmitframe->buf_addr = pxmitbuf->pbuf + TXDESC_SIZE + CMD_HDR_SZ; /*RTL8712_DMA_H2CCMD */ r8712_construct_txaggr_cmd_desc(pxmitbuf); r8712_construct_txaggr_cmd_hdr(pxmitbuf); r8712_append_mpdu_unit(pxmitbuf, pxmitframe); pxmitbuf->aggr_nr = 1; } u16 r8712_xmitframe_aggr_next(struct xmit_buf *pxmitbuf, struct xmit_frame *pxmitframe) { pxmitframe->pxmitbuf = pxmitbuf; pxmitbuf->priv_data = pxmitframe; pxmitframe->pxmit_urb[0] = pxmitbuf->pxmit_urb[0]; /* buffer addr assoc */ pxmitframe->buf_addr = pxmitbuf->pbuf + TXDESC_SIZE + (((struct tx_desc *)pxmitbuf->pbuf)->txdw0 & 0x0000ffff); r8712_append_mpdu_unit(pxmitbuf, pxmitframe); r8712_free_xmitframe_ex(&pxmitframe->padapter->xmitpriv, pxmitframe); pxmitbuf->aggr_nr++; return TXDESC_SIZE + (((struct tx_desc *)pxmitbuf->pbuf)->txdw0 & 0x0000ffff); } void r8712_dump_aggr_xframe(struct xmit_buf *pxmitbuf, struct xmit_frame *pxmitframe) { struct _adapter *padapter = pxmitframe->padapter; struct dvobj_priv *pdvobj = &padapter->dvobjpriv; struct tx_desc *ptxdesc = pxmitbuf->pbuf; struct cmd_hdr *pcmd_hdr = (struct cmd_hdr *) (pxmitbuf->pbuf + TXDESC_SIZE); u16 total_length = (u16)(ptxdesc->txdw0 & 0xffff); /* use 1st xmitframe as media */ xmitframe_xmitbuf_attach(pxmitframe, pxmitbuf); pcmd_hdr->cmd_dw0 = cpu_to_le32(((total_length - CMD_HDR_SZ) & 0x0000ffff) | (pcmd_hdr->cmd_dw0 & 0xffff0000)); /* urb length in cmd_dw1 */ pcmd_hdr->cmd_dw1 = cpu_to_le32((pxmitbuf->aggr_nr & 0xff) | ((total_length + TXDESC_SIZE) << 16)); pxmitframe->last[0] = 1; pxmitframe->bpending[0] = false; pxmitframe->mem_addr = pxmitbuf->pbuf; if ((pdvobj->ishighspeed && ((total_length + TXDESC_SIZE) % 0x200) == 0) || ((!pdvobj->ishighspeed && ((total_length + TXDESC_SIZE) % 0x40) == 0))) { ptxdesc->txdw0 |= cpu_to_le32 (((TXDESC_SIZE + OFFSET_SZ + 8) << OFFSET_SHT) & 0x00ff0000); /*32 bytes for TX Desc + 8 bytes pending*/ } else { ptxdesc->txdw0 |= cpu_to_le32 (((TXDESC_SIZE + OFFSET_SZ) << OFFSET_SHT) & 0x00ff0000); /*default = 32 bytes for TX Desc*/ } r8712_write_port(pxmitframe->padapter, RTL8712_DMA_H2CCMD, total_length + TXDESC_SIZE, (u8 *)pxmitframe); } #endif static void update_txdesc(struct xmit_frame *pxmitframe, uint *pmem, int sz) { uint qsel; struct _adapter *padapter = pxmitframe->padapter; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct qos_priv *pqospriv = &pmlmepriv->qospriv; struct security_priv *psecuritypriv = &padapter->securitypriv; struct pkt_attrib *pattrib = &pxmitframe->attrib; struct tx_desc *ptxdesc = (struct tx_desc *)pmem; struct dvobj_priv *pdvobj = &padapter->dvobjpriv; #ifdef CONFIG_R8712_TX_AGGR struct cmd_priv *pcmdpriv = &padapter->cmdpriv; #endif u8 blnSetTxDescOffset; bool bmcst = is_multicast_ether_addr(pattrib->ra); struct ht_priv *phtpriv = &pmlmepriv->htpriv; struct tx_desc txdesc_mp; memcpy(&txdesc_mp, ptxdesc, sizeof(struct tx_desc)); memset(ptxdesc, 0, sizeof(struct tx_desc)); /* offset 0 */ ptxdesc->txdw0 |= cpu_to_le32(sz & 0x0000ffff); if (pdvobj->ishighspeed) { if (((sz + TXDESC_SIZE) % 512) == 0) blnSetTxDescOffset = 1; else blnSetTxDescOffset = 0; } else { if (((sz + TXDESC_SIZE) % 64) == 0) blnSetTxDescOffset = 1; else blnSetTxDescOffset = 0; } if (blnSetTxDescOffset) { /* 32 bytes for TX Desc + 8 bytes pending */ ptxdesc->txdw0 |= cpu_to_le32(((TXDESC_SIZE + OFFSET_SZ + 8) << OFFSET_SHT) & 0x00ff0000); } else { /* default = 32 bytes for TX Desc */ ptxdesc->txdw0 |= cpu_to_le32(((TXDESC_SIZE + OFFSET_SZ) << OFFSET_SHT) & 0x00ff0000); } ptxdesc->txdw0 |= cpu_to_le32(OWN | FSG | LSG); if (pxmitframe->frame_tag == DATA_FRAMETAG) { /* offset 4 */ ptxdesc->txdw1 |= cpu_to_le32((pattrib->mac_id) & 0x1f); #ifdef CONFIG_R8712_TX_AGGR /* dirty workaround, need to check if it is aggr cmd. */ if ((u8 *)pmem != (u8 *)pxmitframe->pxmitbuf->pbuf) { ptxdesc->txdw0 |= cpu_to_le32 ((0x3 << TYPE_SHT) & TYPE_MSK); qsel = (uint)(pattrib->qsel & 0x0000001f); if (qsel == 2) qsel = 0; ptxdesc->txdw1 |= cpu_to_le32 ((qsel << QSEL_SHT) & 0x00001f00); ptxdesc->txdw2 = cpu_to_le32 ((qsel << RTS_RC_SHT) & 0x001f0000); ptxdesc->txdw6 |= cpu_to_le32 ((0x5 << RSVD6_SHT) & RSVD6_MSK); } else { ptxdesc->txdw0 |= cpu_to_le32 ((0x3 << TYPE_SHT) & TYPE_MSK); ptxdesc->txdw1 |= cpu_to_le32 ((0x13 << QSEL_SHT) & 0x00001f00); qsel = (uint)(pattrib->qsel & 0x0000001f); if (qsel == 2) qsel = 0; ptxdesc->txdw2 = cpu_to_le32 ((qsel << RTS_RC_SHT) & 0x0001f000); ptxdesc->txdw7 |= cpu_to_le32 (pcmdpriv->cmd_seq << 24); pcmdpriv->cmd_seq++; } pattrib->qsel = 0x13; #else qsel = (uint)(pattrib->qsel & 0x0000001f); ptxdesc->txdw1 |= cpu_to_le32((qsel << QSEL_SHT) & 0x00001f00); #endif if (!pqospriv->qos_option) ptxdesc->txdw1 |= cpu_to_le32(BIT(16));/*Non-QoS*/ if ((pattrib->encrypt > 0) && !pattrib->bswenc) { switch (pattrib->encrypt) { /*SEC_TYPE*/ case _WEP40_: case _WEP104_: ptxdesc->txdw1 |= cpu_to_le32((0x01 << 22) & 0x00c00000); /*KEY_ID when WEP is used;*/ ptxdesc->txdw1 |= cpu_to_le32((psecuritypriv->PrivacyKeyIndex << 17) & 0x00060000); break; case _TKIP_: case _TKIP_WTMIC_: ptxdesc->txdw1 |= cpu_to_le32((0x02 << 22) & 0x00c00000); break; case _AES_: ptxdesc->txdw1 |= cpu_to_le32((0x03 << 22) & 0x00c00000); break; case _NO_PRIVACY_: default: break; } } /*offset 8*/ if (bmcst) ptxdesc->txdw2 |= cpu_to_le32(BMC); /*offset 12*/ /* f/w will increase the seqnum by itself, driver pass the * correct priority to fw. * fw will check the correct priority for increasing the * seqnum per tid. about usb using 4-endpoint, qsel points out * the correct mapping between AC&Endpoint, * the purpose is that correct mapping lets the MAC release * the AC Queue list correctly. */ ptxdesc->txdw3 = cpu_to_le32((pattrib->priority << SEQ_SHT) & 0x0fff0000); if ((pattrib->ether_type != 0x888e) && (pattrib->ether_type != 0x0806) && (pattrib->dhcp_pkt != 1)) { /*Not EAP & ARP type data packet*/ if (phtpriv->ht_option == 1) { /*B/G/N Mode*/ if (!phtpriv->ampdu_enable) ptxdesc->txdw2 |= cpu_to_le32(BK); } } else { /* EAP data packet and ARP packet. * Use the 1M data rate to send the EAP/ARP packet. * This will maybe make the handshake smooth. */ /*driver uses data rate*/ ptxdesc->txdw4 = cpu_to_le32(0x80000000); ptxdesc->txdw5 = cpu_to_le32(0x001f8000);/*1M*/ } if (pattrib->pctrl == 1) { /* mp tx packets */ struct tx_desc *ptxdesc_mp; ptxdesc_mp = &txdesc_mp; /* offset 8 */ ptxdesc->txdw2 = ptxdesc_mp->txdw2; if (bmcst) ptxdesc->txdw2 |= cpu_to_le32(BMC); ptxdesc->txdw2 |= cpu_to_le32(BK); /* offset 16 */ ptxdesc->txdw4 = ptxdesc_mp->txdw4; /* offset 20 */ ptxdesc->txdw5 = ptxdesc_mp->txdw5; pattrib->pctrl = 0;/* reset to zero; */ } } else if (pxmitframe->frame_tag == MGNT_FRAMETAG) { /* offset 4 */ /* CAM_ID(MAC_ID), default=5; */ ptxdesc->txdw1 |= cpu_to_le32((0x05) & 0x1f); qsel = (uint)(pattrib->qsel & 0x0000001f); ptxdesc->txdw1 |= cpu_to_le32((qsel << QSEL_SHT) & 0x00001f00); ptxdesc->txdw1 |= cpu_to_le32(BIT(16));/* Non-QoS */ /* offset 8 */ if (bmcst) ptxdesc->txdw2 |= cpu_to_le32(BMC); /* offset 12 */ /* f/w will increase the seqnum by itself, driver pass the * correct priority to fw. * fw will check the correct priority for increasing the seqnum * per tid. about usb using 4-endpoint, qsel points out the * correct mapping between AC&Endpoint, * the purpose is that correct mapping let the MAC releases * the AC Queue list correctly. */ ptxdesc->txdw3 = cpu_to_le32((pattrib->priority << SEQ_SHT) & 0x0fff0000); /* offset 16 */ ptxdesc->txdw4 = cpu_to_le32(0x80002040);/*gtest*/ /* offset 20 */ ptxdesc->txdw5 = cpu_to_le32(0x001f8000);/* gtest 1M */ } else if (pxmitframe->frame_tag == TXAGG_FRAMETAG) { /* offset 4 */ qsel = 0x13; ptxdesc->txdw1 |= cpu_to_le32((qsel << QSEL_SHT) & 0x00001f00); } else { /* offset 4 */ qsel = (uint)(pattrib->priority & 0x0000001f); ptxdesc->txdw1 |= cpu_to_le32((qsel << QSEL_SHT) & 0x00001f00); /*offset 8*/ /*offset 12*/ ptxdesc->txdw3 = cpu_to_le32((pattrib->seqnum << SEQ_SHT) & 0x0fff0000); /*offset 16*/ ptxdesc->txdw4 = cpu_to_le32(0x80002040);/*gtest*/ /*offset 20*/ ptxdesc->txdw5 = cpu_to_le32(0x001f9600);/*gtest*/ } } int r8712_xmitframe_complete(struct _adapter *padapter, struct xmit_priv *pxmitpriv, struct xmit_buf *pxmitbuf) { struct hw_xmit *phwxmits; sint hwentry; struct xmit_frame *pxmitframe = NULL; #ifdef CONFIG_R8712_TX_AGGR struct xmit_frame *p2ndxmitframe = NULL; #else int res = _SUCCESS; #endif phwxmits = pxmitpriv->hwxmits; hwentry = pxmitpriv->hwxmit_entry; if (!pxmitbuf) { pxmitbuf = r8712_alloc_xmitbuf(pxmitpriv); if (!pxmitbuf) return false; #ifdef CONFIG_R8712_TX_AGGR pxmitbuf->aggr_nr = 0; #endif } /* 1st frame dequeued */ pxmitframe = dequeue_xframe_ex(pxmitpriv, phwxmits, hwentry); /* need to remember the 1st frame */ if (pxmitframe) { #ifdef CONFIG_R8712_TX_AGGR /* 1. dequeue 2nd frame * 2. aggr if 2nd xframe is dequeued, else dump directly */ if (AGGR_NR_HIGH_BOUND > 1) p2ndxmitframe = dequeue_xframe_ex(pxmitpriv, phwxmits, hwentry); if (pxmitframe->frame_tag != DATA_FRAMETAG) { r8712_free_xmitbuf(pxmitpriv, pxmitbuf); return false; } if (p2ndxmitframe) if (p2ndxmitframe->frame_tag != DATA_FRAMETAG) { r8712_free_xmitbuf(pxmitpriv, pxmitbuf); return false; } r8712_xmitframe_aggr_1st(pxmitbuf, pxmitframe); if (p2ndxmitframe) { u16 total_length; total_length = r8712_xmitframe_aggr_next(pxmitbuf, p2ndxmitframe); do { p2ndxmitframe = dequeue_xframe_ex(pxmitpriv, phwxmits, hwentry); if (p2ndxmitframe) total_length = r8712_xmitframe_aggr_next(pxmitbuf, p2ndxmitframe); else break; } while (total_length <= 0x1800 && pxmitbuf->aggr_nr <= AGGR_NR_HIGH_BOUND); } if (pxmitbuf->aggr_nr > 0) r8712_dump_aggr_xframe(pxmitbuf, pxmitframe); #else xmitframe_xmitbuf_attach(pxmitframe, pxmitbuf); if (pxmitframe->frame_tag == DATA_FRAMETAG) { if (pxmitframe->attrib.priority <= 15) res = r8712_xmitframe_coalesce(padapter, pxmitframe->pkt, pxmitframe); /* always return ndis_packet after * r8712_xmitframe_coalesce */ r8712_xmit_complete(padapter, pxmitframe); } if (res == _SUCCESS) dump_xframe(padapter, pxmitframe); else r8712_free_xmitframe_ex(pxmitpriv, pxmitframe); #endif } else { /* pxmitframe == NULL && p2ndxmitframe == NULL */ r8712_free_xmitbuf(pxmitpriv, pxmitbuf); return false; } return true; } static void dump_xframe(struct _adapter *padapter, struct xmit_frame *pxmitframe) { int t, sz, w_sz; u8 *mem_addr; u32 ff_hwaddr; struct pkt_attrib *pattrib = &pxmitframe->attrib; struct xmit_priv *pxmitpriv = &padapter->xmitpriv; struct security_priv *psecuritypriv = &padapter->securitypriv; if (pxmitframe->attrib.ether_type != 0x0806) { if (pxmitframe->attrib.ether_type != 0x888e) r8712_issue_addbareq_cmd(padapter, pattrib->priority); } mem_addr = pxmitframe->buf_addr; for (t = 0; t < pattrib->nr_frags; t++) { if (t != (pattrib->nr_frags - 1)) { sz = pxmitpriv->frag_len; sz = sz - 4 - (psecuritypriv->sw_encrypt ? 0 : pattrib->icv_len); pxmitframe->last[t] = 0; } else { sz = pattrib->last_txcmdsz; pxmitframe->last[t] = 1; } update_txdesc(pxmitframe, (uint *)mem_addr, sz); w_sz = sz + TXDESC_SIZE; pxmitframe->mem_addr = mem_addr; pxmitframe->bpending[t] = false; ff_hwaddr = get_ff_hwaddr(pxmitframe); #ifdef CONFIG_R8712_TX_AGGR r8712_write_port(padapter, RTL8712_DMA_H2CCMD, w_sz, (unsigned char *)pxmitframe); #else r8712_write_port(padapter, ff_hwaddr, w_sz, (unsigned char *)pxmitframe); #endif mem_addr += w_sz; mem_addr = (u8 *)RND4(((addr_t)(mem_addr))); } } void r8712_xmit_direct(struct _adapter *padapter, struct xmit_frame *pxmitframe) { int res; res = r8712_xmitframe_coalesce(padapter, pxmitframe->pkt, pxmitframe); pxmitframe->pkt = NULL; if (res == _SUCCESS) dump_xframe(padapter, pxmitframe); } int r8712_xmit_enqueue(struct _adapter *padapter, struct xmit_frame *pxmitframe) { if (r8712_xmit_classifier(padapter, pxmitframe)) { pxmitframe->pkt = NULL; return _FAIL; } return _SUCCESS; }
4 4 12 6 9 25 1 2 2 2 4 3 5 22 20 20 4 1 1 1 9 9 3 9 12 14 14 1 1 2 2 1 2 1 1 3 3 2 7 1 4 2 2 6 3 8 29 2 1 1 1 6 5 4 7 1 2 2 2 2 4 3 11 1 1 1 1 2 3 2 3 1 7 1 1 3 1 2 3 3 3 1 3 2 2 9 9 2 8 4 4 4 4 26 2 21 19 4 9 9 9 1 3 2 13 9 34 2 32 2 1 2 16 11 4 13 27 1 1 1 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2010-2011 EIA Electronics, // Pieter Beyens <pieter.beyens@eia.be> // Copyright (c) 2010-2011 EIA Electronics, // Kurt Van Dijck <kurt.van.dijck@eia.be> // Copyright (c) 2018 Protonic, // Robin van der Gracht <robin@protonic.nl> // Copyright (c) 2017-2019 Pengutronix, // Marc Kleine-Budde <kernel@pengutronix.de> // Copyright (c) 2017-2019 Pengutronix, // Oleksij Rempel <kernel@pengutronix.de> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/can/can-ml.h> #include <linux/can/core.h> #include <linux/can/skb.h> #include <linux/errqueue.h> #include <linux/if_arp.h> #include "j1939-priv.h" #define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939) /* conversion function between struct sock::sk_priority from linux and * j1939 priority field */ static inline priority_t j1939_prio(u32 sk_priority) { sk_priority = min(sk_priority, 7U); return 7 - sk_priority; } static inline u32 j1939_to_sk_priority(priority_t prio) { return 7 - prio; } /* function to see if pgn is to be evaluated */ static inline bool j1939_pgn_is_valid(pgn_t pgn) { return pgn <= J1939_PGN_MAX; } /* test function to avoid non-zero DA placeholder for pdu1 pgn's */ static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn) { if (j1939_pgn_is_pdu1(pgn)) return !(pgn & 0xff); else return true; } static inline void j1939_sock_pending_add(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); atomic_inc(&jsk->skb_pending); } static int j1939_sock_pending_get(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); return atomic_read(&jsk->skb_pending); } void j1939_sock_pending_del(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); /* atomic_dec_return returns the new value */ if (!atomic_dec_return(&jsk->skb_pending)) wake_up(&jsk->waitq); /* no pending SKB's */ } static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) { jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); write_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); write_unlock_bh(&priv->j1939_socks_lock); } static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) { write_lock_bh(&priv->j1939_socks_lock); list_del_init(&jsk->list); write_unlock_bh(&priv->j1939_socks_lock); j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; } static bool j1939_sk_queue_session(struct j1939_session *session) { struct j1939_sock *jsk = j1939_sk(session->sk); bool empty; spin_lock_bh(&jsk->sk_session_queue_lock); empty = list_empty(&jsk->sk_session_queue); j1939_session_get(session); list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue); spin_unlock_bh(&jsk->sk_session_queue_lock); j1939_sock_pending_add(&jsk->sk); return empty; } static struct j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk) { struct j1939_session *session = NULL; spin_lock_bh(&jsk->sk_session_queue_lock); if (!list_empty(&jsk->sk_session_queue)) { session = list_last_entry(&jsk->sk_session_queue, struct j1939_session, sk_session_queue_entry); if (session->total_queued_size == session->total_message_size) session = NULL; else j1939_session_get(session); } spin_unlock_bh(&jsk->sk_session_queue_lock); return session; } static void j1939_sk_queue_drop_all(struct j1939_priv *priv, struct j1939_sock *jsk, int err) { struct j1939_session *session, *tmp; netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err); spin_lock_bh(&jsk->sk_session_queue_lock); list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue, sk_session_queue_entry) { list_del_init(&session->sk_session_queue_entry); session->err = err; j1939_session_put(session); } spin_unlock_bh(&jsk->sk_session_queue_lock); } static void j1939_sk_queue_activate_next_locked(struct j1939_session *session) { struct j1939_sock *jsk; struct j1939_session *first; int err; /* RX-Session don't have a socket (yet) */ if (!session->sk) return; jsk = j1939_sk(session->sk); lockdep_assert_held(&jsk->sk_session_queue_lock); err = session->err; first = list_first_entry_or_null(&jsk->sk_session_queue, struct j1939_session, sk_session_queue_entry); /* Some else has already activated the next session */ if (first != session) return; activate_next: list_del_init(&first->sk_session_queue_entry); j1939_session_put(first); first = list_first_entry_or_null(&jsk->sk_session_queue, struct j1939_session, sk_session_queue_entry); if (!first) return; if (j1939_session_activate(first)) { netdev_warn_once(first->priv->ndev, "%s: 0x%p: Identical session is already activated.\n", __func__, first); first->err = -EBUSY; goto activate_next; } else { /* Give receiver some time (arbitrary chosen) to recover */ int time_ms = 0; if (err) time_ms = 10 + get_random_u32_below(16); j1939_tp_schedule_txtimer(first, time_ms); } } void j1939_sk_queue_activate_next(struct j1939_session *session) { struct j1939_sock *jsk; if (!session->sk) return; jsk = j1939_sk(session->sk); spin_lock_bh(&jsk->sk_session_queue_lock); j1939_sk_queue_activate_next_locked(session); spin_unlock_bh(&jsk->sk_session_queue_lock); } static bool j1939_sk_match_dst(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { if ((jsk->state & J1939_SOCK_PROMISC)) return true; /* Destination address filter */ if (jsk->addr.src_name && skcb->addr.dst_name) { if (jsk->addr.src_name != skcb->addr.dst_name) return false; } else { /* receive (all sockets) if * - all packages that match our bind() address * - all broadcast on a socket if SO_BROADCAST * is set */ if (j1939_address_is_unicast(skcb->addr.da)) { if (jsk->addr.sa != skcb->addr.da) return false; } else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) { /* receiving broadcast without SO_BROADCAST * flag is not allowed */ return false; } } /* Source address filter */ if (jsk->state & J1939_SOCK_CONNECTED) { /* receive (all sockets) if * - all packages that match our connect() name or address */ if (jsk->addr.dst_name && skcb->addr.src_name) { if (jsk->addr.dst_name != skcb->addr.src_name) return false; } else { if (jsk->addr.da != skcb->addr.sa) return false; } } /* PGN filter */ if (j1939_pgn_is_valid(jsk->pgn_rx_filter) && jsk->pgn_rx_filter != skcb->addr.pgn) return false; return true; } /* matches skb control buffer (addr) with a j1939 filter */ static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { const struct j1939_filter *f; int nfilter; spin_lock_bh(&jsk->filters_lock); f = jsk->filters; nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) continue; if ((skcb->addr.sa & f->addr_mask) != f->addr) continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; goto filter_match_found; } spin_unlock_bh(&jsk->filters_lock); return false; filter_match_found: spin_unlock_bh(&jsk->filters_lock); return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { if (!(jsk->state & J1939_SOCK_BOUND)) return false; if (!j1939_sk_match_dst(jsk, skcb)) return false; if (!j1939_sk_match_filter(jsk, skcb)) return false; return true; } static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) { const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb); struct j1939_sk_buff_cb *skcb; struct sk_buff *skb; if (oskb->sk == &jsk->sk) return; if (!j1939_sk_recv_match_one(jsk, oskcb)) return; skb = skb_clone(oskb, GFP_ATOMIC); if (!skb) { pr_warn("skb clone failed\n"); return; } can_skb_set_owner(skb, oskb->sk); skcb = j1939_skb_to_cb(skb); skcb->msg_flags &= ~(MSG_DONTROUTE); if (skb->sk) skcb->msg_flags |= MSG_DONTROUTE; if (sock_queue_rcv_skb(&jsk->sk, skb) < 0) kfree_skb(skb); } bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) { struct j1939_sock *jsk; bool match = false; read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { match = j1939_sk_recv_match_one(jsk, skcb); if (match) break; } read_unlock_bh(&priv->j1939_socks_lock); return match; } void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sock *jsk; read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { j1939_sk_recv_one(jsk, skb); } read_unlock_bh(&priv->j1939_socks_lock); } static void j1939_sk_sock_destruct(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); /* This function will be called by the generic networking code, when * the socket is ultimately closed (sk->sk_destruct). * * The race between * - processing a received CAN frame * (can_receive -> j1939_can_recv) * and accessing j1939_priv * ... and ... * - closing a socket * (j1939_can_rx_unregister -> can_rx_unregister) * and calling the final j1939_priv_put() * * is avoided by calling the final j1939_priv_put() from this * RCU deferred cleanup call. */ if (jsk->priv) { j1939_priv_put(jsk->priv); jsk->priv = NULL; } /* call generic CAN sock destruct */ can_sock_destruct(sk); } static int j1939_sk_init(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); /* Ensure that "sk" is first member in "struct j1939_sock", so that we * can skip it during memset(). */ BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0); memset((void *)jsk + sizeof(jsk->sk), 0x0, sizeof(*jsk) - sizeof(jsk->sk)); INIT_LIST_HEAD(&jsk->list); init_waitqueue_head(&jsk->waitq); jsk->sk.sk_priority = j1939_to_sk_priority(6); jsk->sk.sk_reuse = 1; /* per default */ jsk->addr.sa = J1939_NO_ADDR; jsk->addr.da = J1939_NO_ADDR; jsk->addr.pgn = J1939_NO_PGN; jsk->pgn_rx_filter = J1939_NO_PGN; atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); spin_lock_init(&jsk->filters_lock); /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_destruct = j1939_sk_sock_destruct; sk->sk_protocol = CAN_J1939; return 0; } static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len) { if (!addr) return -EDESTADDRREQ; if (len < J1939_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; if (!addr->can_ifindex) return -ENODEV; if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) return -EINVAL; return 0; } static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct j1939_sock *jsk = j1939_sk(sock->sk); struct j1939_priv *priv; struct sock *sk; struct net *net; int ret = 0; ret = j1939_sk_sanity_check(addr, len); if (ret) return ret; lock_sock(sock->sk); priv = jsk->priv; sk = sock->sk; net = sock_net(sk); /* Already bound to an interface? */ if (jsk->state & J1939_SOCK_BOUND) { /* A re-bind() to a different interface is not * supported. */ if (jsk->ifindex != addr->can_ifindex) { ret = -EINVAL; goto out_release_sock; } /* drop old references */ j1939_jsk_del(priv, jsk); j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa); } else { struct can_ml_priv *can_ml; struct net_device *ndev; ndev = dev_get_by_index(net, addr->can_ifindex); if (!ndev) { ret = -ENODEV; goto out_release_sock; } can_ml = can_get_ml_priv(ndev); if (!can_ml) { dev_put(ndev); ret = -ENODEV; goto out_release_sock; } if (!(ndev->flags & IFF_UP)) { dev_put(ndev); ret = -ENETDOWN; goto out_release_sock; } priv = j1939_netdev_start(ndev); dev_put(ndev); if (IS_ERR(priv)) { ret = PTR_ERR(priv); goto out_release_sock; } jsk->ifindex = addr->can_ifindex; /* the corresponding j1939_priv_put() is called via * sk->sk_destruct, which points to j1939_sk_sock_destruct() */ j1939_priv_get(priv); jsk->priv = priv; } /* set default transmit pgn */ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) jsk->pgn_rx_filter = addr->can_addr.j1939.pgn; jsk->addr.src_name = addr->can_addr.j1939.name; jsk->addr.sa = addr->can_addr.j1939.addr; /* get new references */ ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa); if (ret) { j1939_netdev_stop(priv); goto out_release_sock; } j1939_jsk_add(priv, jsk); out_release_sock: /* fall through */ release_sock(sock->sk); return ret; } static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr, int len, int flags) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct j1939_sock *jsk = j1939_sk(sock->sk); int ret = 0; ret = j1939_sk_sanity_check(addr, len); if (ret) return ret; lock_sock(sock->sk); /* bind() before connect() is mandatory */ if (!(jsk->state & J1939_SOCK_BOUND)) { ret = -EINVAL; goto out_release_sock; } /* A connect() to a different interface is not supported. */ if (jsk->ifindex != addr->can_ifindex) { ret = -EINVAL; goto out_release_sock; } if (!addr->can_addr.j1939.name && addr->can_addr.j1939.addr == J1939_NO_ADDR && !sock_flag(&jsk->sk, SOCK_BROADCAST)) { /* broadcast, but SO_BROADCAST not set */ ret = -EACCES; goto out_release_sock; } jsk->addr.dst_name = addr->can_addr.j1939.name; jsk->addr.da = addr->can_addr.j1939.addr; if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) jsk->addr.pgn = addr->can_addr.j1939.pgn; jsk->state |= J1939_SOCK_CONNECTED; out_release_sock: /* fall through */ release_sock(sock->sk); return ret; } static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr, const struct j1939_sock *jsk, int peer) { /* There are two holes (2 bytes and 3 bytes) to clear to avoid * leaking kernel information to user space. */ memset(addr, 0, J1939_MIN_NAMELEN); addr->can_family = AF_CAN; addr->can_ifindex = jsk->ifindex; addr->can_addr.j1939.pgn = jsk->addr.pgn; if (peer) { addr->can_addr.j1939.name = jsk->addr.dst_name; addr->can_addr.j1939.addr = jsk->addr.da; } else { addr->can_addr.j1939.name = jsk->addr.src_name; addr->can_addr.j1939.addr = jsk->addr.sa; } } static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct j1939_sock *jsk = j1939_sk(sk); int ret = 0; lock_sock(sk); if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) { ret = -EADDRNOTAVAIL; goto failure; } j1939_sk_sock2sockaddr_can(addr, jsk, peer); ret = J1939_MIN_NAMELEN; failure: release_sock(sk); return ret; } static int j1939_sk_release(struct socket *sock) { struct sock *sk = sock->sk; struct j1939_sock *jsk; if (!sk) return 0; lock_sock(sk); jsk = j1939_sk(sk); if (jsk->state & J1939_SOCK_BOUND) { struct j1939_priv *priv = jsk->priv; if (wait_event_interruptible(jsk->waitq, !j1939_sock_pending_get(&jsk->sk))) { j1939_cancel_active_session(priv, sk); j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN); } j1939_jsk_del(priv, jsk); j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa); j1939_netdev_stop(priv); } kfree(jsk->filters); sock_orphan(sk); sock->sk = NULL; release_sock(sk); sock_put(sk); return 0; } static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, sockptr_t optval, unsigned int optlen, int flag) { int tmp; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, optval, optlen)) return -EFAULT; lock_sock(&jsk->sk); if (tmp) jsk->state |= flag; else jsk->state &= ~flag; release_sock(&jsk->sk); return tmp; } static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct j1939_sock *jsk = j1939_sk(sk); int tmp, count = 0, ret = 0; struct j1939_filter *filters = NULL, *ofilters; if (level != SOL_CAN_J1939) return -EINVAL; switch (optname) { case SO_J1939_FILTER: if (!sockptr_is_null(optval) && optlen != 0) { struct j1939_filter *f; int c; if (optlen % sizeof(*filters) != 0) return -EINVAL; if (optlen > J1939_FILTER_MAX * sizeof(struct j1939_filter)) return -EINVAL; count = optlen / sizeof(*filters); filters = memdup_sockptr(optval, optlen); if (IS_ERR(filters)) return PTR_ERR(filters); for (f = filters, c = count; c; f++, c--) { f->name &= f->name_mask; f->pgn &= f->pgn_mask; f->addr &= f->addr_mask; } } lock_sock(&jsk->sk); spin_lock_bh(&jsk->filters_lock); ofilters = jsk->filters; jsk->filters = filters; jsk->nfilters = count; spin_unlock_bh(&jsk->filters_lock); release_sock(&jsk->sk); kfree(ofilters); return 0; case SO_J1939_PROMISC: return j1939_sk_setsockopt_flag(jsk, optval, optlen, J1939_SOCK_PROMISC); case SO_J1939_ERRQUEUE: ret = j1939_sk_setsockopt_flag(jsk, optval, optlen, J1939_SOCK_ERRQUEUE); if (ret < 0) return ret; if (!(jsk->state & J1939_SOCK_ERRQUEUE)) skb_queue_purge(&sk->sk_error_queue); return ret; case SO_J1939_SEND_PRIO: if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, optval, optlen)) return -EFAULT; if (tmp < 0 || tmp > 7) return -EDOM; if (tmp < 2 && !capable(CAP_NET_ADMIN)) return -EPERM; lock_sock(&jsk->sk); jsk->sk.sk_priority = j1939_to_sk_priority(tmp); release_sock(&jsk->sk); return 0; default: return -ENOPROTOOPT; } } static int j1939_sk_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct j1939_sock *jsk = j1939_sk(sk); int ret, ulen; /* set defaults for using 'int' properties */ int tmp = 0; int len = sizeof(tmp); void *val = &tmp; if (level != SOL_CAN_J1939) return -EINVAL; if (get_user(ulen, optlen)) return -EFAULT; if (ulen < 0) return -EINVAL; lock_sock(&jsk->sk); switch (optname) { case SO_J1939_PROMISC: tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0; break; case SO_J1939_ERRQUEUE: tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0; break; case SO_J1939_SEND_PRIO: tmp = j1939_prio(jsk->sk.sk_priority); break; default: ret = -ENOPROTOOPT; goto no_copy; } /* copy to user, based on 'len' & 'val' * but most sockopt's are 'int' properties, and have 'len' & 'val' * left unchanged, but instead modified 'tmp' */ if (len > ulen) ret = -EFAULT; else if (put_user(len, optlen)) ret = -EFAULT; else if (copy_to_user(optval, val, len)) ret = -EFAULT; else ret = 0; no_copy: release_sock(&jsk->sk); return ret; } static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; struct j1939_sk_buff_cb *skcb; int ret = 0; if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE | MSG_CMSG_COMPAT)) return -EINVAL; if (flags & MSG_ERRQUEUE) return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939, SCM_J1939_ERRQUEUE); skb = skb_recv_datagram(sk, flags, &ret); if (!skb) return ret; if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else size = skb->len; ret = memcpy_to_msg(msg, skb->data, size); if (ret < 0) { skb_free_datagram(sk, skb); return ret; } skcb = j1939_skb_to_cb(skb); if (j1939_address_is_valid(skcb->addr.da)) put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR, sizeof(skcb->addr.da), &skcb->addr.da); if (skcb->addr.dst_name) put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME, sizeof(skcb->addr.dst_name), &skcb->addr.dst_name); put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO, sizeof(skcb->priority), &skcb->priority); if (msg->msg_name) { struct sockaddr_can *paddr = msg->msg_name; msg->msg_namelen = J1939_MIN_NAMELEN; memset(msg->msg_name, 0, msg->msg_namelen); paddr->can_family = AF_CAN; paddr->can_ifindex = skb->skb_iif; paddr->can_addr.j1939.name = skcb->addr.src_name; paddr->can_addr.j1939.addr = skcb->addr.sa; paddr->can_addr.j1939.pgn = skcb->addr.pgn; } sock_recv_cmsgs(msg, sk, skb); msg->msg_flags |= skcb->msg_flags; skb_free_datagram(sk, skb); return size; } static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev, struct sock *sk, struct msghdr *msg, size_t size, int *errcode) { struct j1939_sock *jsk = j1939_sk(sk); struct j1939_sk_buff_cb *skcb; struct sk_buff *skb; int ret; skb = sock_alloc_send_skb(sk, size + sizeof(struct can_frame) - sizeof(((struct can_frame *)NULL)->data) + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &ret); if (!skb) goto failure; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = ndev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb_reserve(skb, offsetof(struct can_frame, data)); ret = memcpy_from_msg(skb_put(skb, size), msg, size); if (ret < 0) goto free_skb; skb->dev = ndev; skcb = j1939_skb_to_cb(skb); memset(skcb, 0, sizeof(*skcb)); skcb->addr = jsk->addr; skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority)); if (msg->msg_name) { struct sockaddr_can *addr = msg->msg_name; if (addr->can_addr.j1939.name || addr->can_addr.j1939.addr != J1939_NO_ADDR) { skcb->addr.dst_name = addr->can_addr.j1939.name; skcb->addr.da = addr->can_addr.j1939.addr; } if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) skcb->addr.pgn = addr->can_addr.j1939.pgn; } *errcode = ret; return skb; free_skb: kfree_skb(skb); failure: *errcode = ret; return NULL; } static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type) { switch (type) { case J1939_ERRQUEUE_RX_RTS: return nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */ nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */ nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */ nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */ nla_total_size(sizeof(u8)) + /* J1939_NLA_SRC_ADDR */ nla_total_size(sizeof(u8)) + /* J1939_NLA_DEST_ADDR */ 0; default: return nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ 0; } } static struct sk_buff * j1939_sk_get_timestamping_opt_stats(struct j1939_session *session, enum j1939_sk_errqueue_type type) { struct sk_buff *stats; u32 size; stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC); if (!stats) return NULL; if (session->skcb.addr.type == J1939_SIMPLE) size = session->total_message_size; else size = min(session->pkt.tx_acked * 7, session->total_message_size); switch (type) { case J1939_ERRQUEUE_RX_RTS: nla_put_u32(stats, J1939_NLA_TOTAL_SIZE, session->total_message_size); nla_put_u32(stats, J1939_NLA_PGN, session->skcb.addr.pgn); nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME, session->skcb.addr.src_name, J1939_NLA_PAD); nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME, session->skcb.addr.dst_name, J1939_NLA_PAD); nla_put_u8(stats, J1939_NLA_SRC_ADDR, session->skcb.addr.sa); nla_put_u8(stats, J1939_NLA_DEST_ADDR, session->skcb.addr.da); break; default: nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); } return stats; } static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, enum j1939_sk_errqueue_type type) { struct j1939_priv *priv = session->priv; struct j1939_sock *jsk; struct sock_exterr_skb *serr; struct sk_buff *skb; char *state = "UNK"; u32 tsflags; int err; jsk = j1939_sk(sk); if (!(jsk->state & J1939_SOCK_ERRQUEUE)) return; tsflags = READ_ONCE(sk->sk_tsflags); switch (type) { case J1939_ERRQUEUE_TX_ACK: if (!(tsflags & SOF_TIMESTAMPING_TX_ACK)) return; break; case J1939_ERRQUEUE_TX_SCHED: if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED)) return; break; case J1939_ERRQUEUE_TX_ABORT: break; case J1939_ERRQUEUE_RX_RTS: fallthrough; case J1939_ERRQUEUE_RX_DPO: fallthrough; case J1939_ERRQUEUE_RX_ABORT: if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE)) return; break; default: netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); } skb = j1939_sk_get_timestamping_opt_stats(session, type); if (!skb) return; skb->tstamp = ktime_get_real(); BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); switch (type) { case J1939_ERRQUEUE_TX_ACK: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_ACK; state = "TX ACK"; break; case J1939_ERRQUEUE_TX_SCHED: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_SCHED; state = "TX SCH"; break; case J1939_ERRQUEUE_TX_ABORT: serr->ee.ee_errno = session->err; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_info = J1939_EE_INFO_TX_ABORT; state = "TX ABT"; break; case J1939_ERRQUEUE_RX_RTS: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_info = J1939_EE_INFO_RX_RTS; state = "RX RTS"; break; case J1939_ERRQUEUE_RX_DPO: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_info = J1939_EE_INFO_RX_DPO; state = "RX DPO"; break; case J1939_ERRQUEUE_RX_ABORT: serr->ee.ee_errno = session->err; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_info = J1939_EE_INFO_RX_ABORT; state = "RX ABT"; break; } serr->opt_stats = true; if (tsflags & SOF_TIMESTAMPING_OPT_ID) serr->ee.ee_data = session->tskey; netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n", __func__, session, session->tskey, state); err = sock_queue_err_skb(sk, skb); if (err) kfree_skb(skb); }; void j1939_sk_errqueue(struct j1939_session *session, enum j1939_sk_errqueue_type type) { struct j1939_priv *priv = session->priv; struct j1939_sock *jsk; if (session->sk) { /* send TX notifications to the socket of origin */ __j1939_sk_errqueue(session, session->sk, type); return; } /* spread RX notifications to all sockets subscribed to this session */ read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { if (j1939_sk_recv_match_one(jsk, &session->skcb)) __j1939_sk_errqueue(session, &jsk->sk, type); } read_unlock_bh(&priv->j1939_socks_lock); }; void j1939_sk_send_loop_abort(struct sock *sk, int err) { struct j1939_sock *jsk = j1939_sk(sk); if (jsk->state & J1939_SOCK_ERRQUEUE) return; sk->sk_err = err; sk_error_report(sk); } static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, struct msghdr *msg, size_t size) { struct j1939_sock *jsk = j1939_sk(sk); struct j1939_session *session = j1939_sk_get_incomplete_session(jsk); struct sk_buff *skb; size_t segment_size, todo_size; int ret = 0; if (session && session->total_message_size != session->total_queued_size + size) { j1939_session_put(session); return -EIO; } todo_size = size; while (todo_size) { struct j1939_sk_buff_cb *skcb; segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE, todo_size); /* Allocate skb for one segment */ skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size, &ret); if (ret) break; skcb = j1939_skb_to_cb(skb); if (!session) { /* at this point the size should be full size * of the session */ skcb->offset = 0; session = j1939_tp_send(priv, skb, size); if (IS_ERR(session)) { ret = PTR_ERR(session); goto kfree_skb; } if (j1939_sk_queue_session(session)) { /* try to activate session if we a * fist in the queue */ if (!j1939_session_activate(session)) { j1939_tp_schedule_txtimer(session, 0); } else { ret = -EBUSY; session->err = ret; j1939_sk_queue_drop_all(priv, jsk, EBUSY); break; } } } else { skcb->offset = session->total_queued_size; j1939_session_skb_queue(session, skb); } todo_size -= segment_size; session->total_queued_size += segment_size; } switch (ret) { case 0: /* OK */ if (todo_size) netdev_warn(priv->ndev, "no error found and not completely queued?! %zu\n", todo_size); ret = size; break; case -ERESTARTSYS: ret = -EINTR; fallthrough; case -EAGAIN: /* OK */ if (todo_size != size) ret = size - todo_size; break; default: /* ERROR */ break; } if (session) j1939_session_put(session); return ret; kfree_skb: kfree_skb(skb); return ret; } static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct j1939_sock *jsk = j1939_sk(sk); struct j1939_priv *priv; int ifindex; int ret; lock_sock(sock->sk); /* various socket state tests */ if (!(jsk->state & J1939_SOCK_BOUND)) { ret = -EBADFD; goto sendmsg_done; } priv = jsk->priv; ifindex = jsk->ifindex; if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) { /* no source address assigned yet */ ret = -EBADFD; goto sendmsg_done; } /* deal with provided destination address info */ if (msg->msg_name) { struct sockaddr_can *addr = msg->msg_name; if (msg->msg_namelen < J1939_MIN_NAMELEN) { ret = -EINVAL; goto sendmsg_done; } if (addr->can_family != AF_CAN) { ret = -EINVAL; goto sendmsg_done; } if (addr->can_ifindex && addr->can_ifindex != ifindex) { ret = -EBADFD; goto sendmsg_done; } if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) { ret = -EINVAL; goto sendmsg_done; } if (!addr->can_addr.j1939.name && addr->can_addr.j1939.addr == J1939_NO_ADDR && !sock_flag(sk, SOCK_BROADCAST)) { /* broadcast, but SO_BROADCAST not set */ ret = -EACCES; goto sendmsg_done; } } else { if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR && !sock_flag(sk, SOCK_BROADCAST)) { /* broadcast, but SO_BROADCAST not set */ ret = -EACCES; goto sendmsg_done; } } ret = j1939_sk_send_loop(priv, sk, msg, size); sendmsg_done: release_sock(sock->sk); return ret; } void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) { struct j1939_sock *jsk; int error_code = ENETDOWN; read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { jsk->sk.sk_err = error_code; if (!sock_flag(&jsk->sk, SOCK_DEAD)) sk_error_report(&jsk->sk); j1939_sk_queue_drop_all(priv, jsk, error_code); } read_unlock_bh(&priv->j1939_socks_lock); } static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { /* no ioctls for socket layer -> hand it down to NIC layer */ return -ENOIOCTLCMD; } static const struct proto_ops j1939_ops = { .family = PF_CAN, .release = j1939_sk_release, .bind = j1939_sk_bind, .connect = j1939_sk_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = j1939_sk_getname, .poll = datagram_poll, .ioctl = j1939_sk_no_ioctlcmd, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = j1939_sk_setsockopt, .getsockopt = j1939_sk_getsockopt, .sendmsg = j1939_sk_sendmsg, .recvmsg = j1939_sk_recvmsg, .mmap = sock_no_mmap, }; static struct proto j1939_proto __read_mostly = { .name = "CAN_J1939", .owner = THIS_MODULE, .obj_size = sizeof(struct j1939_sock), .init = j1939_sk_init, }; const struct can_proto j1939_can_proto = { .type = SOCK_DGRAM, .protocol = CAN_J1939, .ops = &j1939_ops, .prot = &j1939_proto, };
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 /* SPDX-License-Identifier: GPL-2.0-or-later */ /*************************************************************************** * Linux PPP over X - Generic PPP transport layer sockets * Linux PPP over Ethernet (PPPoE) Socket Implementation (RFC 2516) * * This file supplies definitions required by the PPP over Ethernet driver * (pppox.c). All version information wrt this file is located in pppox.c */ #ifndef __LINUX_IF_PPPOX_H #define __LINUX_IF_PPPOX_H #include <linux/if.h> #include <linux/netdevice.h> #include <linux/ppp_channel.h> #include <linux/skbuff.h> #include <linux/workqueue.h> #include <uapi/linux/if_pppox.h> static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb) { return (struct pppoe_hdr *)skb_network_header(skb); } struct pppoe_opt { struct net_device *dev; /* device associated with socket*/ int ifindex; /* ifindex of device associated with socket */ struct pppoe_addr pa; /* what this socket is bound to*/ struct sockaddr_pppox relay; /* what socket data will be relayed to (PPPoE relaying) */ struct work_struct padt_work;/* Work item for handling PADT */ }; struct pptp_opt { struct pptp_addr src_addr; struct pptp_addr dst_addr; u32 ack_sent, ack_recv; u32 seq_sent, seq_recv; int ppp_flags; }; #include <net/sock.h> struct pppox_sock { /* struct sock must be the first member of pppox_sock */ struct sock sk; struct ppp_channel chan; struct pppox_sock *next; /* for hash table */ union { struct pppoe_opt pppoe; struct pptp_opt pptp; } proto; __be16 num; }; #define pppoe_dev proto.pppoe.dev #define pppoe_ifindex proto.pppoe.ifindex #define pppoe_pa proto.pppoe.pa #define pppoe_relay proto.pppoe.relay static inline struct pppox_sock *pppox_sk(struct sock *sk) { return (struct pppox_sock *)sk; } static inline struct sock *sk_pppox(struct pppox_sock *po) { return (struct sock *)po; } struct module; struct pppox_proto { int (*create)(struct net *net, struct socket *sock, int kern); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; }; extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp); extern void unregister_pppox_proto(int proto_num); extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); #define PPPOEIOCSFWD32 _IOW(0xB1 ,0, compat_size_t) /* PPPoX socket states */ enum { PPPOX_NONE = 0, /* initial state */ PPPOX_CONNECTED = 1, /* connection established ==TCP_ESTABLISHED */ PPPOX_BOUND = 2, /* bound to ppp device */ PPPOX_RELAY = 4, /* forwarding is enabled */ PPPOX_DEAD = 16 /* dead, useless, please clean me up!*/ }; #endif /* !(__LINUX_IF_PPPOX_H) */
2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 // SPDX-License-Identifier: GPL-2.0-or-later /* * Host Side support for RNDIS Networking Links * Copyright (C) 2005 by David Brownell */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/usbnet.h> #include <linux/usb/rndis_host.h> /* * RNDIS is NDIS remoted over USB. It's a MSFT variant of CDC ACM ... of * course ACM was intended for modems, not Ethernet links! USB's standard * for Ethernet links is "CDC Ethernet", which is significantly simpler. * * NOTE that Microsoft's "RNDIS 1.0" specification is incomplete. Issues * include: * - Power management in particular relies on information that's scattered * through other documentation, and which is incomplete or incorrect even * there. * - There are various undocumented protocol requirements, such as the * need to send unused garbage in control-OUT messages. * - In some cases, MS-Windows will emit undocumented requests; this * matters more to peripheral implementations than host ones. * * Moreover there's a no-open-specs variant of RNDIS called "ActiveSync". * * For these reasons and others, ** USE OF RNDIS IS STRONGLY DISCOURAGED ** in * favor of such non-proprietary alternatives as CDC Ethernet or the newer (and * currently rare) "Ethernet Emulation Model" (EEM). */ /* * RNDIS notifications from device: command completion; "reverse" * keepalives; etc */ void rndis_status(struct usbnet *dev, struct urb *urb) { netdev_dbg(dev->net, "rndis status urb, len %d stat %d\n", urb->actual_length, urb->status); // FIXME for keepalives, respond immediately (asynchronously) // if not an RNDIS status, do like cdc_status(dev,urb) does } EXPORT_SYMBOL_GPL(rndis_status); /* * RNDIS indicate messages. */ static void rndis_msg_indicate(struct usbnet *dev, struct rndis_indicate *msg, int buflen) { struct cdc_state *info = (void *)&dev->data; struct device *udev = &info->control->dev; if (dev->driver_info->indication) { dev->driver_info->indication(dev, msg, buflen); } else { u32 status = le32_to_cpu(msg->status); switch (status) { case RNDIS_STATUS_MEDIA_CONNECT: dev_info(udev, "rndis media connect\n"); break; case RNDIS_STATUS_MEDIA_DISCONNECT: dev_info(udev, "rndis media disconnect\n"); break; default: dev_info(udev, "rndis indication: 0x%08x\n", status); } } } /* * RPC done RNDIS-style. Caller guarantees: * - message is properly byteswapped * - there's no other request pending * - buf can hold up to 1KB response (required by RNDIS spec) * On return, the first few entries are already byteswapped. * * Call context is likely probe(), before interface name is known, * which is why we won't try to use it in the diagnostics. */ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen) { struct cdc_state *info = (void *) &dev->data; struct usb_cdc_notification notification; int master_ifnum; int retval; int partial; unsigned count; u32 xid = 0, msg_len, request_id, msg_type, rsp, status; /* REVISIT when this gets called from contexts other than probe() or * disconnect(): either serialize, or dispatch responses on xid */ msg_type = le32_to_cpu(buf->msg_type); /* Issue the request; xid is unique, don't bother byteswapping it */ if (likely(msg_type != RNDIS_MSG_HALT && msg_type != RNDIS_MSG_RESET)) { xid = dev->xid++; if (!xid) xid = dev->xid++; buf->request_id = (__force __le32) xid; } master_ifnum = info->control->cur_altsetting->desc.bInterfaceNumber; retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), USB_CDC_SEND_ENCAPSULATED_COMMAND, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, master_ifnum, buf, le32_to_cpu(buf->msg_len), RNDIS_CONTROL_TIMEOUT_MS); if (unlikely(retval < 0 || xid == 0)) return retval; /* Some devices don't respond on the control channel until * polled on the status channel, so do that first. */ if (dev->driver_info->data & RNDIS_DRIVER_DATA_POLL_STATUS) { retval = usb_interrupt_msg( dev->udev, usb_rcvintpipe(dev->udev, dev->status->desc.bEndpointAddress), &notification, sizeof(notification), &partial, RNDIS_CONTROL_TIMEOUT_MS); if (unlikely(retval < 0)) return retval; } /* Poll the control channel; the request probably completed immediately */ rsp = le32_to_cpu(buf->msg_type) | RNDIS_MSG_COMPLETION; for (count = 0; count < 10; count++) { memset(buf, 0, CONTROL_BUFFER_SIZE); retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), USB_CDC_GET_ENCAPSULATED_RESPONSE, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, master_ifnum, buf, buflen, RNDIS_CONTROL_TIMEOUT_MS); if (likely(retval >= 8)) { msg_type = le32_to_cpu(buf->msg_type); msg_len = le32_to_cpu(buf->msg_len); status = le32_to_cpu(buf->status); request_id = (__force u32) buf->request_id; if (likely(msg_type == rsp)) { if (likely(request_id == xid)) { if (unlikely(rsp == RNDIS_MSG_RESET_C)) return 0; if (likely(RNDIS_STATUS_SUCCESS == status)) return 0; dev_dbg(&info->control->dev, "rndis reply status %08x\n", status); return -EL3RST; } dev_dbg(&info->control->dev, "rndis reply id %d expected %d\n", request_id, xid); /* then likely retry */ } else switch (msg_type) { case RNDIS_MSG_INDICATE: /* fault/event */ rndis_msg_indicate(dev, (void *)buf, buflen); break; case RNDIS_MSG_KEEPALIVE: { /* ping */ struct rndis_keepalive_c *msg = (void *)buf; msg->msg_type = cpu_to_le32(RNDIS_MSG_KEEPALIVE_C); msg->msg_len = cpu_to_le32(sizeof *msg); msg->status = cpu_to_le32(RNDIS_STATUS_SUCCESS); retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), USB_CDC_SEND_ENCAPSULATED_COMMAND, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, master_ifnum, msg, sizeof *msg, RNDIS_CONTROL_TIMEOUT_MS); if (unlikely(retval < 0)) dev_dbg(&info->control->dev, "rndis keepalive err %d\n", retval); } break; default: dev_dbg(&info->control->dev, "unexpected rndis msg %08x len %d\n", le32_to_cpu(buf->msg_type), msg_len); } } else { /* device probably issued a protocol stall; ignore */ dev_dbg(&info->control->dev, "rndis response error, code %d\n", retval); } msleep(40); } dev_dbg(&info->control->dev, "rndis response timeout\n"); return -ETIMEDOUT; } EXPORT_SYMBOL_GPL(rndis_command); /* * rndis_query: * * Performs a query for @oid along with 0 or more bytes of payload as * specified by @in_len. If @reply_len is not set to -1 then the reply * length is checked against this value, resulting in an error if it * doesn't match. * * NOTE: Adding a payload exactly or greater than the size of the expected * response payload is an evident requirement MSFT added for ActiveSync. * * The only exception is for OIDs that return a variably sized response, * in which case no payload should be added. This undocumented (and * nonsensical!) issue was found by sniffing protocol requests from the * ActiveSync 4.1 Windows driver. */ static int rndis_query(struct usbnet *dev, struct usb_interface *intf, void *buf, u32 oid, u32 in_len, void **reply, int *reply_len) { int retval; union { void *buf; struct rndis_msg_hdr *header; struct rndis_query *get; struct rndis_query_c *get_c; } u; u32 off, len; u.buf = buf; memset(u.get, 0, sizeof *u.get + in_len); u.get->msg_type = cpu_to_le32(RNDIS_MSG_QUERY); u.get->msg_len = cpu_to_le32(sizeof *u.get + in_len); u.get->oid = cpu_to_le32(oid); u.get->len = cpu_to_le32(in_len); u.get->offset = cpu_to_le32(20); retval = rndis_command(dev, u.header, CONTROL_BUFFER_SIZE); if (unlikely(retval < 0)) { dev_err(&intf->dev, "RNDIS_MSG_QUERY(0x%08x) failed, %d\n", oid, retval); return retval; } off = le32_to_cpu(u.get_c->offset); len = le32_to_cpu(u.get_c->len); if (unlikely((off > CONTROL_BUFFER_SIZE - 8) || (len > CONTROL_BUFFER_SIZE - 8 - off))) goto response_error; if (*reply_len != -1 && len != *reply_len) goto response_error; *reply = (unsigned char *) &u.get_c->request_id + off; *reply_len = len; return retval; response_error: dev_err(&intf->dev, "RNDIS_MSG_QUERY(0x%08x) " "invalid response - off %d len %d\n", oid, off, len); return -EDOM; } /* same as usbnet_netdev_ops but MTU change not allowed */ static const struct net_device_ops rndis_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; int generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags) { int retval; struct net_device *net = dev->net; struct cdc_state *info = (void *) &dev->data; union { void *buf; struct rndis_msg_hdr *header; struct rndis_init *init; struct rndis_init_c *init_c; struct rndis_query *get; struct rndis_query_c *get_c; struct rndis_set *set; struct rndis_set_c *set_c; struct rndis_halt *halt; } u; u32 tmp; __le32 phym_unspec, *phym; int reply_len; unsigned char *bp; /* we can't rely on i/o from stack working, or stack allocation */ u.buf = kmalloc(CONTROL_BUFFER_SIZE, GFP_KERNEL); if (!u.buf) return -ENOMEM; retval = usbnet_generic_cdc_bind(dev, intf); if (retval < 0) goto fail; u.init->msg_type = cpu_to_le32(RNDIS_MSG_INIT); u.init->msg_len = cpu_to_le32(sizeof *u.init); u.init->major_version = cpu_to_le32(1); u.init->minor_version = cpu_to_le32(0); /* max transfer (in spec) is 0x4000 at full speed, but for * TX we'll stick to one Ethernet packet plus RNDIS framing. * For RX we handle drivers that zero-pad to end-of-packet. * Don't let userspace change these settings. * * NOTE: there still seems to be weirdness here, as if we need * to do some more things to make sure WinCE targets accept this. * They default to jumbograms of 8KB or 16KB, which is absurd * for such low data rates and which is also more than Linux * can usually expect to allocate for SKB data... */ net->hard_header_len += sizeof (struct rndis_data_hdr); dev->hard_mtu = net->mtu + net->hard_header_len; dev->maxpacket = usb_maxpacket(dev->udev, dev->out); if (dev->maxpacket == 0) { netif_dbg(dev, probe, dev->net, "dev->maxpacket can't be 0\n"); retval = -EINVAL; goto fail_and_release; } dev->rx_urb_size = dev->hard_mtu + (dev->maxpacket + 1); dev->rx_urb_size &= ~(dev->maxpacket - 1); u.init->max_transfer_size = cpu_to_le32(dev->rx_urb_size); net->netdev_ops = &rndis_netdev_ops; retval = rndis_command(dev, u.header, CONTROL_BUFFER_SIZE); if (unlikely(retval < 0)) { /* it might not even be an RNDIS device!! */ dev_err(&intf->dev, "RNDIS init failed, %d\n", retval); goto fail_and_release; } tmp = le32_to_cpu(u.init_c->max_transfer_size); if (tmp < dev->hard_mtu) { if (tmp <= net->hard_header_len) { dev_err(&intf->dev, "dev can't take %u byte packets (max %u)\n", dev->hard_mtu, tmp); retval = -EINVAL; goto halt_fail_and_release; } dev_warn(&intf->dev, "dev can't take %u byte packets (max %u), " "adjusting MTU to %u\n", dev->hard_mtu, tmp, tmp - net->hard_header_len); dev->hard_mtu = tmp; net->mtu = dev->hard_mtu - net->hard_header_len; } /* REVISIT: peripheral "alignment" request is ignored ... */ dev_dbg(&intf->dev, "hard mtu %u (%u from dev), rx buflen %zu, align %d\n", dev->hard_mtu, tmp, dev->rx_urb_size, 1 << le32_to_cpu(u.init_c->packet_alignment)); /* module has some device initialization code needs to be done right * after RNDIS_INIT */ if (dev->driver_info->early_init && dev->driver_info->early_init(dev) != 0) goto halt_fail_and_release; /* Check physical medium */ phym = NULL; reply_len = sizeof *phym; retval = rndis_query(dev, intf, u.buf, RNDIS_OID_GEN_PHYSICAL_MEDIUM, reply_len, (void **)&phym, &reply_len); if (retval != 0 || !phym) { /* OID is optional so don't fail here. */ phym_unspec = cpu_to_le32(RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED); phym = &phym_unspec; } if ((flags & FLAG_RNDIS_PHYM_WIRELESS) && le32_to_cpup(phym) != RNDIS_PHYSICAL_MEDIUM_WIRELESS_LAN) { netif_dbg(dev, probe, dev->net, "driver requires wireless physical medium, but device is not\n"); retval = -ENODEV; goto halt_fail_and_release; } if ((flags & FLAG_RNDIS_PHYM_NOT_WIRELESS) && le32_to_cpup(phym) == RNDIS_PHYSICAL_MEDIUM_WIRELESS_LAN) { netif_dbg(dev, probe, dev->net, "driver requires non-wireless physical medium, but device is wireless.\n"); retval = -ENODEV; goto halt_fail_and_release; } /* Get designated host ethernet address */ reply_len = ETH_ALEN; retval = rndis_query(dev, intf, u.buf, RNDIS_OID_802_3_PERMANENT_ADDRESS, 48, (void **) &bp, &reply_len); if (unlikely(retval< 0)) { dev_err(&intf->dev, "rndis get ethaddr, %d\n", retval); goto halt_fail_and_release; } eth_hw_addr_set(net, bp); /* set a nonzero filter to enable data transfers */ memset(u.set, 0, sizeof *u.set); u.set->msg_type = cpu_to_le32(RNDIS_MSG_SET); u.set->msg_len = cpu_to_le32(4 + sizeof *u.set); u.set->oid = cpu_to_le32(RNDIS_OID_GEN_CURRENT_PACKET_FILTER); u.set->len = cpu_to_le32(4); u.set->offset = cpu_to_le32((sizeof *u.set) - 8); *(__le32 *)(u.buf + sizeof *u.set) = cpu_to_le32(RNDIS_DEFAULT_FILTER); retval = rndis_command(dev, u.header, CONTROL_BUFFER_SIZE); if (unlikely(retval < 0)) { dev_err(&intf->dev, "rndis set packet filter, %d\n", retval); goto halt_fail_and_release; } retval = 0; kfree(u.buf); return retval; halt_fail_and_release: memset(u.halt, 0, sizeof *u.halt); u.halt->msg_type = cpu_to_le32(RNDIS_MSG_HALT); u.halt->msg_len = cpu_to_le32(sizeof *u.halt); (void) rndis_command(dev, (void *)u.halt, CONTROL_BUFFER_SIZE); fail_and_release: usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver_of(intf), info->data); info->data = NULL; fail: kfree(u.buf); return retval; } EXPORT_SYMBOL_GPL(generic_rndis_bind); static int rndis_bind(struct usbnet *dev, struct usb_interface *intf) { return generic_rndis_bind(dev, intf, FLAG_RNDIS_PHYM_NOT_WIRELESS); } static int zte_rndis_bind(struct usbnet *dev, struct usb_interface *intf) { int status = rndis_bind(dev, intf); if (!status && (dev->net->dev_addr[0] & 0x02)) eth_hw_addr_random(dev->net); return status; } void rndis_unbind(struct usbnet *dev, struct usb_interface *intf) { struct rndis_halt *halt; /* try to clear any rndis state/activity (no i/o from stack!) */ halt = kzalloc(CONTROL_BUFFER_SIZE, GFP_KERNEL); if (halt) { halt->msg_type = cpu_to_le32(RNDIS_MSG_HALT); halt->msg_len = cpu_to_le32(sizeof *halt); (void) rndis_command(dev, (void *)halt, CONTROL_BUFFER_SIZE); kfree(halt); } usbnet_cdc_unbind(dev, intf); } EXPORT_SYMBOL_GPL(rndis_unbind); /* * DATA -- host must not write zlps */ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { bool dst_mac_fixup; /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) return 0; dst_mac_fixup = !!(dev->driver_info->data & RNDIS_DRIVER_DATA_DST_MAC_FIXUP); /* peripheral may have batched packets to us... */ while (likely(skb->len)) { struct rndis_data_hdr *hdr = (void *)skb->data; struct sk_buff *skb2; u32 msg_type, msg_len, data_offset, data_len; msg_type = le32_to_cpu(hdr->msg_type); msg_len = le32_to_cpu(hdr->msg_len); data_offset = le32_to_cpu(hdr->data_offset); data_len = le32_to_cpu(hdr->data_len); /* don't choke if we see oob, per-packet data, etc */ if (unlikely(msg_type != RNDIS_MSG_PACKET || skb->len < msg_len || (data_offset + data_len + 8) > msg_len)) { dev->net->stats.rx_frame_errors++; netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n", le32_to_cpu(hdr->msg_type), msg_len, data_offset, data_len, skb->len); return 0; } skb_pull(skb, 8 + data_offset); /* at most one packet left? */ if (likely((data_len - skb->len) <= sizeof *hdr)) { skb_trim(skb, data_len); break; } /* try to return all the packets in the batch */ skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) break; skb_pull(skb, msg_len - sizeof *hdr); skb_trim(skb2, data_len); if (unlikely(dst_mac_fixup)) usbnet_cdc_zte_rx_fixup(dev, skb2); usbnet_skb_return(dev, skb2); } /* caller will usbnet_skb_return the remaining packet */ if (unlikely(dst_mac_fixup)) usbnet_cdc_zte_rx_fixup(dev, skb); return 1; } EXPORT_SYMBOL_GPL(rndis_rx_fixup); struct sk_buff * rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { struct rndis_data_hdr *hdr; struct sk_buff *skb2; unsigned len = skb->len; if (likely(!skb_cloned(skb))) { int room = skb_headroom(skb); /* enough head room as-is? */ if (unlikely((sizeof *hdr) <= room)) goto fill; /* enough room, but needs to be readjusted? */ room += skb_tailroom(skb); if (likely((sizeof *hdr) <= room)) { skb->data = memmove(skb->head + sizeof *hdr, skb->data, len); skb_set_tail_pointer(skb, len); goto fill; } } /* create a new skb, with the correct size (and tailpad) */ skb2 = skb_copy_expand(skb, sizeof *hdr, 1, flags); dev_kfree_skb_any(skb); if (unlikely(!skb2)) return skb2; skb = skb2; /* fill out the RNDIS header. we won't bother trying to batch * packets; Linux minimizes wasted bandwidth through tx queues. */ fill: hdr = __skb_push(skb, sizeof *hdr); memset(hdr, 0, sizeof *hdr); hdr->msg_type = cpu_to_le32(RNDIS_MSG_PACKET); hdr->msg_len = cpu_to_le32(skb->len); hdr->data_offset = cpu_to_le32(sizeof(*hdr) - 8); hdr->data_len = cpu_to_le32(len); /* FIXME make the last packet always be short ... */ return skb; } EXPORT_SYMBOL_GPL(rndis_tx_fixup); static const struct driver_info rndis_info = { .description = "RNDIS device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, .bind = rndis_bind, .unbind = rndis_unbind, .status = rndis_status, .rx_fixup = rndis_rx_fixup, .tx_fixup = rndis_tx_fixup, }; static const struct driver_info rndis_poll_status_info = { .description = "RNDIS device (poll status before control)", .flags = FLAG_ETHER | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, .data = RNDIS_DRIVER_DATA_POLL_STATUS, .bind = rndis_bind, .unbind = rndis_unbind, .status = rndis_status, .rx_fixup = rndis_rx_fixup, .tx_fixup = rndis_tx_fixup, }; static const struct driver_info zte_rndis_info = { .description = "ZTE RNDIS device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, .data = RNDIS_DRIVER_DATA_DST_MAC_FIXUP, .bind = zte_rndis_bind, .unbind = rndis_unbind, .status = rndis_status, .rx_fixup = rndis_rx_fixup, .tx_fixup = rndis_tx_fixup, }; /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { { /* 2Wire HomePortal 1000SW */ USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long) &rndis_poll_status_info, }, { /* Hytera Communications DMR radios' "Radio to PC Network" */ USB_VENDOR_AND_INTERFACE_INFO(0x238b, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long)&rndis_info, }, { /* ZTE WWAN modules */ USB_VENDOR_AND_INTERFACE_INFO(0x19d2, USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long)&zte_rndis_info, }, { /* ZTE WWAN modules, ACM flavour */ USB_VENDOR_AND_INTERFACE_INFO(0x19d2, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long)&zte_rndis_info, }, { /* RNDIS is MSFT's un-official variant of CDC ACM */ USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long) &rndis_info, }, { /* "ActiveSync" is an undocumented variant of RNDIS, used in WM5 */ USB_INTERFACE_INFO(USB_CLASS_MISC, 1, 1), .driver_info = (unsigned long) &rndis_poll_status_info, }, { /* RNDIS for tethering */ USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { /* Novatel Verizon USB730L */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), .driver_info = (unsigned long) &rndis_info, }, { }, // END }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver rndis_driver = { .name = "rndis_host", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .disable_hub_initiated_lpm = 1, }; module_usb_driver(rndis_driver); MODULE_AUTHOR("David Brownell"); MODULE_DESCRIPTION("USB Host side RNDIS driver"); MODULE_LICENSE("GPL");
6 3 6 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 /* * linux/fs/nls/nls_cp1250.c * * Charset cp1250 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x20ac, 0x0000, 0x201a, 0x0000, 0x201e, 0x2026, 0x2020, 0x2021, 0x0000, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179, /* 0x90*/ 0x0000, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x0000, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a, /* 0xa0*/ 0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x015e, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x017b, /* 0xb0*/ 0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c, /* 0xc0*/ 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, /* 0xd0*/ 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, /* 0xe0*/ 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, /* 0xf0*/ 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0x00, 0x00, 0x00, 0xa4, 0x00, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0xae, 0x00, /* 0xa8-0xaf */ 0xb0, 0xb1, 0x00, 0x00, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0x00, 0x00, 0xbb, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0xc1, 0xc2, 0x00, 0xc4, 0x00, 0x00, 0xc7, /* 0xc0-0xc7 */ 0x00, 0xc9, 0x00, 0xcb, 0x00, 0xcd, 0xce, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0x00, 0x00, 0xda, 0x00, 0xdc, 0xdd, 0x00, 0xdf, /* 0xd8-0xdf */ 0x00, 0xe1, 0xe2, 0x00, 0xe4, 0x00, 0x00, 0xe7, /* 0xe0-0xe7 */ 0x00, 0xe9, 0x00, 0xeb, 0x00, 0xed, 0xee, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0x00, 0x00, 0xfa, 0x00, 0xfc, 0xfd, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0xc3, 0xe3, 0xa5, 0xb9, 0xc6, 0xe6, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xc8, 0xe8, 0xcf, 0xef, /* 0x08-0x0f */ 0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xca, 0xea, 0xcc, 0xec, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0xc5, 0xe5, 0x00, 0x00, 0xbc, 0xbe, 0x00, /* 0x38-0x3f */ 0x00, 0xa3, 0xb3, 0xd1, 0xf1, 0x00, 0x00, 0xd2, /* 0x40-0x47 */ 0xf2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xd5, 0xf5, 0x00, 0x00, 0xc0, 0xe0, 0x00, 0x00, /* 0x50-0x57 */ 0xd8, 0xf8, 0x8c, 0x9c, 0x00, 0x00, 0xaa, 0xba, /* 0x58-0x5f */ 0x8a, 0x9a, 0xde, 0xfe, 0x8d, 0x9d, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd9, 0xf9, /* 0x68-0x6f */ 0xdb, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x8f, 0x9f, 0xaf, 0xbf, 0x8e, 0x9e, 0x00, /* 0x78-0x7f */ }; static const unsigned char page02[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa1, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0xa2, 0xff, 0x00, 0xb2, 0x00, 0xbd, 0x00, 0x00, /* 0xd8-0xdf */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */ 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ }; static const unsigned char page21[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, page02, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, page21, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x00, 0x82, 0x00, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x00, 0x89, 0x9a, 0x8b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xb3, 0xa4, 0xb9, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xba, 0xab, 0xac, 0xad, 0xae, 0xbf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbe, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xc0-0xc7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xc8-0xcf */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* 0xd0-0xd7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x00, 0x82, 0x00, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x00, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x99, 0x8a, 0x9b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xa3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xa5, 0xaa, 0xbb, 0xbc, 0xbd, 0xbc, 0xaf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x00, /* 0xd8-0xdf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xe0-0xe7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xe8-0xef */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xf7, /* 0xf0-0xf7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp1250", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp1250(void) { return register_nls(&table); } static void __exit exit_nls_cp1250(void) { unregister_nls(&table); } module_init(init_nls_cp1250) module_exit(exit_nls_cp1250) MODULE_LICENSE("Dual BSD/GPL");
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 // SPDX-License-Identifier: GPL-2.0 #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/export.h> #include <asm/syscall.h> static int collect_syscall(struct task_struct *target, struct syscall_info *info) { unsigned long args[6] = { }; struct pt_regs *regs; if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ memset(info, 0, sizeof(*info)); info->data.nr = -1; return 0; } regs = task_pt_regs(target); if (unlikely(!regs)) { put_task_stack(target); return -EAGAIN; } info->sp = user_stack_pointer(regs); info->data.instruction_pointer = instruction_pointer(regs); info->data.nr = syscall_get_nr(target, regs); if (info->data.nr != -1L) syscall_get_arguments(target, regs, args); info->data.args[0] = args[0]; info->data.args[1] = args[1]; info->data.args[2] = args[2]; info->data.args[3] = args[3]; info->data.args[4] = args[4]; info->data.args[5] = args[5]; put_task_stack(target); return 0; } /** * task_current_syscall - Discover what a blocked task is doing. * @target: thread to examine * @info: structure with the following fields: * .sp - filled with user stack pointer * .data.nr - filled with system call number or -1 * .data.args - filled with @maxargs system call arguments * .data.instruction_pointer - filled with user PC * * If @target is blocked in a system call, returns zero with @info.data.nr * set to the call's number and @info.data.args filled in with its * arguments. Registers not used for system call arguments may not be available * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target * has finished its system call but not yet returned to user mode, such * as when it's stopped for signal handling or syscall exit tracing. * * If @target is blocked in the kernel during a fault or exception, * returns zero with *@info.data.nr set to -1 and does not fill in * @info.data.args. If so, it's now safe to examine @target using * &struct user_regset get() calls as long as we're sure @target won't return * to user mode. * * Returns -%EAGAIN if @target does not remain blocked. */ int task_current_syscall(struct task_struct *target, struct syscall_info *info) { unsigned long ncsw; unsigned int state; if (target == current) return collect_syscall(target, info); state = READ_ONCE(target->__state); if (unlikely(!state)) return -EAGAIN; ncsw = wait_task_inactive(target, state); if (unlikely(!ncsw) || unlikely(collect_syscall(target, info)) || unlikely(wait_task_inactive(target, state) != ncsw)) return -EAGAIN; return 0; }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 // SPDX-License-Identifier: GPL-2.0 /****************************************************************************** * * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved. * Linux device driver for RTL8192SU * * Modifications for inclusion into the Linux staging tree are * Copyright(c) 2010 Larry Finger. All rights reserved. * * Contact information: * WLAN FAE <wlanfae@realtek.com>. * Larry Finger <Larry.Finger@lwfinger.net> * ******************************************************************************/ #define _HAL_INIT_C_ #include <linux/usb.h> #include <linux/device.h> #include <linux/usb/ch9.h> #include <linux/firmware.h> #include <linux/module.h> #include "osdep_service.h" #include "drv_types.h" #include "usb_osintf.h" #define FWBUFF_ALIGN_SZ 512 #define MAX_DUMP_FWSZ (48 * 1024) static void rtl871x_load_fw_fail(struct _adapter *adapter) { struct usb_device *udev = adapter->dvobjpriv.pusbdev; struct device *dev = &udev->dev; struct device *parent = dev->parent; complete(&adapter->rtl8712_fw_ready); dev_err(&udev->dev, "r8712u: Firmware request failed\n"); if (parent) device_lock(parent); device_release_driver(dev); if (parent) device_unlock(parent); } static void rtl871x_load_fw_cb(const struct firmware *firmware, void *context) { struct _adapter *adapter = context; if (!firmware) { rtl871x_load_fw_fail(adapter); return; } adapter->fw = firmware; /* firmware available - start netdev */ register_netdev(adapter->pnetdev); complete(&adapter->rtl8712_fw_ready); } static const char firmware_file[] = "rtlwifi/rtl8712u.bin"; int rtl871x_load_fw(struct _adapter *padapter) { struct device *dev = &padapter->dvobjpriv.pusbdev->dev; int rc; init_completion(&padapter->rtl8712_fw_ready); dev_info(dev, "r8712u: Loading firmware from \"%s\"\n", firmware_file); rc = request_firmware_nowait(THIS_MODULE, 1, firmware_file, dev, GFP_KERNEL, padapter, rtl871x_load_fw_cb); if (rc) dev_err(dev, "r8712u: Firmware request error %d\n", rc); return rc; } MODULE_FIRMWARE("rtlwifi/rtl8712u.bin"); static u32 rtl871x_open_fw(struct _adapter *adapter, const u8 **mappedfw) { if (adapter->fw->size > 200000) { dev_err(&adapter->pnetdev->dev, "r8712u: Bad fw->size of %zu\n", adapter->fw->size); return 0; } *mappedfw = adapter->fw->data; return adapter->fw->size; } static void fill_fwpriv(struct _adapter *adapter, struct fw_priv *fwpriv) { struct dvobj_priv *dvobj = &adapter->dvobjpriv; struct registry_priv *regpriv = &adapter->registrypriv; memset(fwpriv, 0, sizeof(struct fw_priv)); /* todo: check if needs endian conversion */ fwpriv->hci_sel = RTL8712_HCI_TYPE_72USB; fwpriv->usb_ep_num = (u8)dvobj->nr_endpoint; fwpriv->bw_40MHz_en = regpriv->cbw40_enable; switch (regpriv->rf_config) { case RTL8712_RF_1T1R: fwpriv->rf_config = RTL8712_RFC_1T1R; break; case RTL8712_RF_2T2R: fwpriv->rf_config = RTL8712_RFC_2T2R; break; case RTL8712_RF_1T2R: default: fwpriv->rf_config = RTL8712_RFC_1T2R; } fwpriv->mp_mode = (regpriv->mp_mode == 1); /* 0:off 1:on 2:auto */ fwpriv->vcs_type = regpriv->vrtl_carrier_sense; fwpriv->vcs_mode = regpriv->vcs_type; /* 1:RTS/CTS 2:CTS to self */ /* default enable turbo_mode */ fwpriv->turbo_mode = (regpriv->wifi_test != 1); fwpriv->low_power_mode = regpriv->low_power; } static void update_fwhdr(struct fw_hdr *pfwhdr, const u8 *pmappedfw) { pfwhdr->signature = le16_to_cpu(*(__le16 *)pmappedfw); pfwhdr->version = le16_to_cpu(*(__le16 *)(pmappedfw + 2)); /* define the size of boot loader */ pfwhdr->dmem_size = le32_to_cpu(*(__le32 *)(pmappedfw + 4)); /* define the size of FW in IMEM */ pfwhdr->img_IMEM_size = le32_to_cpu(*(__le32 *)(pmappedfw + 8)); /* define the size of FW in SRAM */ pfwhdr->img_SRAM_size = le32_to_cpu(*(__le32 *)(pmappedfw + 12)); /* define the size of DMEM variable */ pfwhdr->fw_priv_sz = le32_to_cpu(*(__le32 *)(pmappedfw + 16)); } static u8 chk_fwhdr(struct fw_hdr *pfwhdr, u32 ulfilelength) { u32 fwhdrsz, fw_sz; /* check signature */ if ((pfwhdr->signature != 0x8712) && (pfwhdr->signature != 0x8192)) return _FAIL; /* check fw_priv_sze & sizeof(struct fw_priv) */ if (pfwhdr->fw_priv_sz != sizeof(struct fw_priv)) return _FAIL; /* check fw_sz & image_fw_sz */ fwhdrsz = offsetof(struct fw_hdr, fwpriv) + pfwhdr->fw_priv_sz; fw_sz = fwhdrsz + pfwhdr->img_IMEM_size + pfwhdr->img_SRAM_size + pfwhdr->dmem_size; if (fw_sz != ulfilelength) return _FAIL; return _SUCCESS; } static u8 rtl8712_dl_fw(struct _adapter *adapter) { sint i; u8 tmp8, tmp8_a; u16 tmp16; u32 maxlen = 0; /* for compare usage */ uint dump_imem_sz, imem_sz, dump_emem_sz, emem_sz; /* max = 49152; */ struct fw_hdr fwhdr; u32 ulfilelength; /* FW file size */ const u8 *mappedfw = NULL; u8 *tmpchar = NULL, *payload, *ptr; struct tx_desc *txdesc; u32 txdscp_sz = sizeof(struct tx_desc); u8 ret = _FAIL; ulfilelength = rtl871x_open_fw(adapter, &mappedfw); if (mappedfw && (ulfilelength > 0)) { update_fwhdr(&fwhdr, mappedfw); if (chk_fwhdr(&fwhdr, ulfilelength) == _FAIL) return ret; fill_fwpriv(adapter, &fwhdr.fwpriv); /* firmware check ok */ maxlen = (fwhdr.img_IMEM_size > fwhdr.img_SRAM_size) ? fwhdr.img_IMEM_size : fwhdr.img_SRAM_size; maxlen += txdscp_sz; tmpchar = kmalloc(maxlen + FWBUFF_ALIGN_SZ, GFP_KERNEL); if (!tmpchar) return ret; txdesc = (struct tx_desc *)(tmpchar + FWBUFF_ALIGN_SZ - ((addr_t)(tmpchar) & (FWBUFF_ALIGN_SZ - 1))); payload = (u8 *)(txdesc) + txdscp_sz; ptr = (u8 *)mappedfw + offsetof(struct fw_hdr, fwpriv) + fwhdr.fw_priv_sz; /* Download FirmWare */ /* 1. determine IMEM code size and Load IMEM Code Section */ imem_sz = fwhdr.img_IMEM_size; do { memset(txdesc, 0, TXDESC_SIZE); if (imem_sz > MAX_DUMP_FWSZ/*49152*/) { dump_imem_sz = MAX_DUMP_FWSZ; } else { dump_imem_sz = imem_sz; txdesc->txdw0 |= cpu_to_le32(BIT(28)); } txdesc->txdw0 |= cpu_to_le32(dump_imem_sz & 0x0000ffff); memcpy(payload, ptr, dump_imem_sz); r8712_write_mem(adapter, RTL8712_DMA_VOQ, dump_imem_sz + TXDESC_SIZE, (u8 *)txdesc); ptr += dump_imem_sz; imem_sz -= dump_imem_sz; } while (imem_sz > 0); i = 10; tmp16 = r8712_read16(adapter, TCR); while (((tmp16 & _IMEM_CODE_DONE) == 0) && (i > 0)) { usleep_range(10, 1000); tmp16 = r8712_read16(adapter, TCR); i--; } if (i == 0 || (tmp16 & _IMEM_CHK_RPT) == 0) goto exit_fail; /* 2.Download EMEM code size and Load EMEM Code Section */ emem_sz = fwhdr.img_SRAM_size; do { memset(txdesc, 0, TXDESC_SIZE); if (emem_sz > MAX_DUMP_FWSZ) { /* max=48k */ dump_emem_sz = MAX_DUMP_FWSZ; } else { dump_emem_sz = emem_sz; txdesc->txdw0 |= cpu_to_le32(BIT(28)); } txdesc->txdw0 |= cpu_to_le32(dump_emem_sz & 0x0000ffff); memcpy(payload, ptr, dump_emem_sz); r8712_write_mem(adapter, RTL8712_DMA_VOQ, dump_emem_sz + TXDESC_SIZE, (u8 *)txdesc); ptr += dump_emem_sz; emem_sz -= dump_emem_sz; } while (emem_sz > 0); i = 5; tmp16 = r8712_read16(adapter, TCR); while (((tmp16 & _EMEM_CODE_DONE) == 0) && (i > 0)) { usleep_range(10, 1000); tmp16 = r8712_read16(adapter, TCR); i--; } if (i == 0 || (tmp16 & _EMEM_CHK_RPT) == 0) goto exit_fail; /* 3.Enable CPU */ tmp8 = r8712_read8(adapter, SYS_CLKR); r8712_write8(adapter, SYS_CLKR, tmp8 | BIT(2)); tmp8_a = r8712_read8(adapter, SYS_CLKR); if (tmp8_a != (tmp8 | BIT(2))) goto exit_fail; tmp8 = r8712_read8(adapter, SYS_FUNC_EN + 1); r8712_write8(adapter, SYS_FUNC_EN + 1, tmp8 | BIT(2)); tmp8_a = r8712_read8(adapter, SYS_FUNC_EN + 1); if (tmp8_a != (tmp8 | BIT(2))) goto exit_fail; r8712_read32(adapter, TCR); /* 4.polling IMEM Ready */ i = 100; tmp16 = r8712_read16(adapter, TCR); while (((tmp16 & _IMEM_RDY) == 0) && (i > 0)) { msleep(20); tmp16 = r8712_read16(adapter, TCR); i--; } if (i == 0) { r8712_write16(adapter, 0x10250348, 0xc000); r8712_write16(adapter, 0x10250348, 0xc001); r8712_write16(adapter, 0x10250348, 0x2000); r8712_write16(adapter, 0x10250348, 0x2001); r8712_write16(adapter, 0x10250348, 0x2002); r8712_write16(adapter, 0x10250348, 0x2003); goto exit_fail; } /* 5.Download DMEM code size and Load EMEM Code Section */ memset(txdesc, 0, TXDESC_SIZE); txdesc->txdw0 |= cpu_to_le32(fwhdr.fw_priv_sz & 0x0000ffff); txdesc->txdw0 |= cpu_to_le32(BIT(28)); memcpy(payload, &fwhdr.fwpriv, fwhdr.fw_priv_sz); r8712_write_mem(adapter, RTL8712_DMA_VOQ, fwhdr.fw_priv_sz + TXDESC_SIZE, (u8 *)txdesc); /* polling dmem code done */ i = 100; tmp16 = r8712_read16(adapter, TCR); while (((tmp16 & _DMEM_CODE_DONE) == 0) && (i > 0)) { msleep(20); tmp16 = r8712_read16(adapter, TCR); i--; } if (i == 0) goto exit_fail; tmp8 = r8712_read8(adapter, 0x1025000A); if (tmp8 & BIT(4)) /* When boot from EEPROM, * & FW need more time to read EEPROM */ i = 60; else /* boot from EFUSE */ i = 30; tmp16 = r8712_read16(adapter, TCR); while (((tmp16 & _FWRDY) == 0) && (i > 0)) { msleep(100); tmp16 = r8712_read16(adapter, TCR); i--; } if (i == 0) goto exit_fail; } else { goto exit_fail; } ret = _SUCCESS; exit_fail: kfree(tmpchar); return ret; } uint rtl8712_hal_init(struct _adapter *padapter) { u32 val32; int i; /* r8712 firmware download */ if (rtl8712_dl_fw(padapter) != _SUCCESS) return _FAIL; netdev_info(padapter->pnetdev, "1 RCR=0x%x\n", r8712_read32(padapter, RCR)); val32 = r8712_read32(padapter, RCR); r8712_write32(padapter, RCR, (val32 | BIT(26))); /* Enable RX TCP * Checksum offload */ netdev_info(padapter->pnetdev, "2 RCR=0x%x\n", r8712_read32(padapter, RCR)); val32 = r8712_read32(padapter, RCR); r8712_write32(padapter, RCR, (val32 | BIT(25))); /* Append PHY status */ val32 = r8712_read32(padapter, 0x10250040); r8712_write32(padapter, 0x10250040, (val32 & 0x00FFFFFF)); /* for usb rx aggregation */ r8712_write8(padapter, 0x102500B5, r8712_read8(padapter, 0x102500B5) | BIT(0)); /* page = 128bytes */ r8712_write8(padapter, 0x102500BD, r8712_read8(padapter, 0x102500BD) | BIT(7)); /* enable usb rx aggregation */ r8712_write8(padapter, 0x102500D9, 1); /* TH=1 => means that invalidate * usb rx aggregation */ r8712_write8(padapter, 0x1025FE5B, 0x04); /* 1.7ms/4 */ /* Fix the RX FIFO issue(USB error) */ r8712_write8(padapter, 0x1025fe5C, r8712_read8(padapter, 0x1025fe5C) | BIT(7)); for (i = 0; i < ETH_ALEN; i++) padapter->eeprompriv.mac_addr[i] = r8712_read8(padapter, MACID + i); return _SUCCESS; } uint rtl8712_hal_deinit(struct _adapter *padapter) { r8712_write8(padapter, RF_CTRL, 0x00); /* Turn off BB */ msleep(20); /* Turn off MAC */ r8712_write8(padapter, SYS_CLKR + 1, 0x38); /* Switch Control Path */ r8712_write8(padapter, SYS_FUNC_EN + 1, 0x70); r8712_write8(padapter, PMC_FSM, 0x06); /* Enable Loader Data Keep */ r8712_write8(padapter, SYS_ISO_CTRL, 0xF9); /* Isolation signals from * CORE, PLL */ r8712_write8(padapter, SYS_ISO_CTRL + 1, 0xe8); /* Enable EFUSE 1.2V */ r8712_write8(padapter, AFE_PLL_CTRL, 0x00); /* Disable AFE PLL. */ r8712_write8(padapter, LDOA15_CTRL, 0x54); /* Disable A15V */ r8712_write8(padapter, SYS_FUNC_EN + 1, 0x50); /* Disable E-Fuse 1.2V */ r8712_write8(padapter, LDOV12D_CTRL, 0x24); /* Disable LDO12(for CE) */ r8712_write8(padapter, AFE_MISC, 0x30); /* Disable AFE BG&MB */ /* Option for Disable 1.6V LDO. */ r8712_write8(padapter, SPS0_CTRL, 0x56); /* Disable 1.6V LDO */ r8712_write8(padapter, SPS0_CTRL + 1, 0x43); /* Set SW PFM */ return _SUCCESS; } uint rtl871x_hal_init(struct _adapter *padapter) { padapter->hw_init_completed = false; if (!padapter->halpriv.hal_bus_init) return _FAIL; if (padapter->halpriv.hal_bus_init(padapter) != _SUCCESS) return _FAIL; if (rtl8712_hal_init(padapter) == _SUCCESS) { padapter->hw_init_completed = true; } else { padapter->hw_init_completed = false; return _FAIL; } return _SUCCESS; }
11 28 1 2 16 16 1 1 3 3 43 2 10 32 38 41 7 7 2 34 6 2 2 2 2 14 7 1 1 3 4 2 1 1 5 5 5 7 7 6 1842 1844 3 1 2 28 28 28 28 2 1 2 1 2 6 2 6 2 6 2 1 2 1 9 2 1 3 2 1 8 53 1 2 37 14 27 9 11 46 43 6 1 1 2 2 6 1 1 2 2 6 7 6 5 4 4 4 8 1 1 2 4 1 3 24 1 3 20 1 3 4 8 4 9 3 8 4 7 1 2 2 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 /* * net/tipc/bearer.c: TIPC bearer code * * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <net/sock.h> #include "core.h" #include "bearer.h" #include "link.h" #include "discover.h" #include "monitor.h" #include "bcast.h" #include "netlink.h" #include "udp_media.h" #include "trace.h" #include "crypto.h" #define MAX_ADDR_STR 60 static struct tipc_media * const media_info_array[] = { &eth_media_info, #ifdef CONFIG_TIPC_MEDIA_IB &ib_media_info, #endif #ifdef CONFIG_TIPC_MEDIA_UDP &udp_media_info, #endif NULL }; static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); return rcu_dereference(tn->bearer_list[bearer_id]); } static void bearer_disable(struct net *net, struct tipc_bearer *b); static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name * @name: name to locate */ struct tipc_media *tipc_media_find(const char *name) { u32 i; for (i = 0; media_info_array[i] != NULL; i++) { if (!strcmp(media_info_array[i]->name, name)) break; } return media_info_array[i]; } /** * media_find_id - locates specified media object by type identifier * @type: type identifier to locate */ static struct tipc_media *media_find_id(u8 type) { u32 i; for (i = 0; media_info_array[i] != NULL; i++) { if (media_info_array[i]->type_id == type) break; } return media_info_array[i]; } /** * tipc_media_addr_printf - record media address in print buffer * @buf: output buffer * @len: output buffer size remaining * @a: input media address */ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; struct tipc_media *m; int ret; m = media_find_id(a->media_id); if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); for (i = 0; i < sizeof(a->value); i++) ret += scnprintf(buf + ret, len - ret, "-%x", a->value[i]); } return ret; } /** * bearer_name_validate - validate & (optionally) deconstruct bearer name * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) * * Return: 1 if bearer name is valid, otherwise 0. */ static int bearer_name_validate(const char *name, struct tipc_bearer_names *name_parts) { char name_copy[TIPC_MAX_BEARER_NAME]; char *media_name; char *if_name; u32 media_len; u32 if_len; /* copy bearer name & ensure length is OK */ if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0) return 0; /* ensure all component parts of bearer name are present */ media_name = name_copy; if_name = strchr(media_name, ':'); if (if_name == NULL) return 0; *(if_name++) = 0; media_len = if_name - media_name; if_len = strlen(if_name) + 1; /* validate component parts of bearer name */ if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) return 0; /* return bearer name components, if necessary */ if (name_parts) { strcpy(name_parts->media_name, media_name); strcpy(name_parts->if_name, if_name); } return 1; } /** * tipc_bearer_find - locates bearer object with matching bearer name * @net: the applicable net namespace * @name: bearer name to locate */ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { b = rtnl_dereference(tn->bearer_list[i]); if (b && (!strcmp(b->name, name))) return b; } return NULL; } /* tipc_bearer_get_name - get the bearer name from its id. * @net: network namespace * @name: a pointer to the buffer where the name will be stored. * @bearer_id: the id to get the name from. */ int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; if (bearer_id >= MAX_BEARERS) return -EINVAL; b = rtnl_dereference(tn->bearer_list[bearer_id]); if (!b) return -EINVAL; strcpy(name, b->name); return 0; } void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) tipc_disc_add_dest(b->disc); rcu_read_unlock(); } void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) tipc_disc_remove_dest(b->disc); rcu_read_unlock(); } /** * tipc_enable_bearer - enable bearer with the given name * @net: the applicable net namespace * @name: bearer name to enable * @disc_domain: bearer domain * @prio: bearer priority * @attr: nlattr array * @extack: netlink extended ack */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, struct nlattr *attr[], struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; int with_this_prio = 1; struct tipc_bearer *b; struct tipc_media *m; struct sk_buff *skb; int bearer_id = 0; int res = -EINVAL; char *errstr = ""; u32 i; if (!bearer_name_validate(name, &b_names)) { NL_SET_ERR_MSG(extack, "Illegal name"); return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } if (prio == TIPC_MEDIA_LINK_PRI) prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ bearer_id = MAX_BEARERS; i = MAX_BEARERS; while (i-- != 0) { b = rtnl_dereference(tn->bearer_list[i]); if (!b) { bearer_id = i; continue; } if (!strcmp(name, b->name)) { errstr = "already enabled"; NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } if (b->priority == prio && (++with_this_prio > 2)) { pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", name, prio); if (prio == TIPC_MIN_LINK_PRI) { errstr = "cannot adjust to lower"; NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); goto rejected; } pr_warn("Bearer <%s>: trying with adjusted priority\n", name); prio--; bearer_id = MAX_BEARERS; i = MAX_BEARERS; with_this_prio = 1; } } if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } b = kzalloc(sizeof(*b), GFP_ATOMIC); if (!b) return -ENOMEM; strcpy(b->name, name); b->media = m; res = m->enable_media(net, b, attr); if (res) { kfree(b); errstr = "failed to enable media"; NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } b->identity = bearer_id; b->tolerance = m->tolerance; b->min_win = m->min_win; b->max_win = m->max_win; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; refcount_set(&b->refcnt, 1); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } /* Create monitoring data before accepting activate messages */ if (tipc_mon_create(net, bearer_id)) { bearer_disable(net, b); kfree_skb(skb); return -ENOMEM; } test_and_set_bit_lock(0, &b->up); rcu_assign_pointer(tn->bearer_list[bearer_id], b); if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); pr_info("Enabled bearer <%s>, priority %u\n", name, prio); return res; rejected: pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr); return res; } /** * tipc_reset_bearer - Reset all links established over this bearer * @net: the applicable net namespace * @b: the target bearer */ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { pr_info("Resetting bearer <%s>\n", b->name); tipc_node_delete_links(net, b->identity); tipc_disc_reset(net, b); return 0; } bool tipc_bearer_hold(struct tipc_bearer *b) { return (b && refcount_inc_not_zero(&b->refcnt)); } void tipc_bearer_put(struct tipc_bearer *b) { if (b && refcount_dec_and_test(&b->refcnt)) kfree_rcu(b, rcu); } /** * bearer_disable - disable this bearer * @net: the applicable net namespace * @b: the bearer to disable * * Note: This routine assumes caller holds RTNL lock. */ static void bearer_disable(struct net *net, struct tipc_bearer *b) { struct tipc_net *tn = tipc_net(net); int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); clear_bit_unlock(0, &b->up); tipc_node_delete_links(net, bearer_id); b->media->disable_media(b); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->disc) tipc_disc_delete(b->disc); RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); tipc_bearer_put(b); tipc_mon_delete(net, bearer_id); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]) { char *dev_name = strchr((const char *)b->name, ':') + 1; int hwaddr_len = b->media->hwaddr_len; u8 node_id[NODE_ID_LEN] = {0,}; struct net_device *dev; /* Find device with specified name */ dev = dev_get_by_name(net, dev_name); if (!dev) return -ENODEV; if (tipc_mtu_bad(dev)) { dev_put(dev); return -EINVAL; } if (dev == net->loopback_dev) { dev_put(dev); pr_info("Enabling <%s> not permitted\n", b->name); return -EINVAL; } /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { memcpy(node_id, dev->dev_addr, hwaddr_len); tipc_net_init(net, node_id, 0); } if (!tipc_own_id(net)) { dev_put(dev); pr_warn("Failed to obtain node identity\n"); return -EINVAL; } /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); b->pt.dev = dev; b->pt.type = htons(ETH_P_TIPC); b->pt.func = tipc_l2_rcv_msg; dev_add_pack(&b->pt); memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len); b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } /* tipc_disable_l2_media - detach TIPC bearer from an L2 interface * @b: the target bearer * * Mark L2 bearer as inactive so that incoming buffers are thrown away */ void tipc_disable_l2_media(struct tipc_bearer *b) { struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); dev_remove_pack(&b->pt); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); } /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @net: the associated network namespace * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct net_device *dev; int delta; dev = (struct net_device *)rcu_dereference(b->media_ptr); if (!dev) return 0; delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) { kfree_skb(skb); return 0; } skb_reset_network_header(skb); skb->dev = dev; skb->protocol = htons(ETH_P_TIPC); dev_hard_header(skb, dev, ETH_P_TIPC, dest->value, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; } bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id) { bool supp = false; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) supp = (b->bcast_addr.broadcast == TIPC_BROADCAST_SUPPORT); rcu_read_unlock(); return supp; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) { int mtu = 0; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) mtu = b->mtu; rcu_read_unlock(); return mtu; } int tipc_bearer_min_mtu(struct net *net, u32 bearer_id) { int mtu = TIPC_MIN_BEARER_MTU; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) mtu += b->encap_hlen; rcu_read_unlock(); return mtu; } /* tipc_bearer_xmit_skb - sends buffer to destination over bearer */ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, struct sk_buff *skb, struct tipc_media_addr *dest) { struct tipc_msg *hdr = buf_msg(skb); struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dest, NULL); if (skb) #endif b->media->send_msg(net, skb, b, dest); } else { kfree_skb(skb); } rcu_read_unlock(); } /* tipc_bearer_xmit() -send buffer to destination over bearer */ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr *dst, struct tipc_node *__dnode) { struct tipc_bearer *b; struct sk_buff *skb, *tmp; if (skb_queue_empty(xmitq)) return; rcu_read_lock(); b = bearer_get(net, bearer_id); if (unlikely(!b)) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dst, __dnode); if (skb) #endif b->media->send_msg(net, skb, b, dst); } else { kfree_skb(skb); } } rcu_read_unlock(); } /* tipc_bearer_bc_xmit() - broadcast buffers to all destinations */ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq) { struct tipc_net *tn = tipc_net(net); struct tipc_media_addr *dst; int net_id = tn->net_id; struct tipc_bearer *b; struct sk_buff *skb, *tmp; struct tipc_msg *hdr; rcu_read_lock(); b = bearer_get(net, bearer_id); if (unlikely(!b || !test_bit(0, &b->up))) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { hdr = buf_msg(skb); msg_set_non_seq(hdr, 1); msg_set_mc_netid(hdr, net_id); __skb_dequeue(xmitq); dst = &b->bcast_addr; #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dst, NULL); if (skb) #endif b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface * @skb: the received message * @dev: the net device that the packet was received on * @pt: the packet_type structure which was used to register this handler * @orig_dev: the original receive net device in case the device is a bond * * Accept only packets explicitly sent to this node, or broadcast packets; * ignores packets sent using interface multicast, and traffic sent to other * nodes (which can happen if interface is running in promiscuous mode). */ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct tipc_bearer *b; rcu_read_lock(); b = rcu_dereference(dev->tipc_ptr) ?: rcu_dereference(orig_dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb_mark_not_on_list(skb); TIPC_SKB_CB(skb)->flags = 0; tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; } rcu_read_unlock(); kfree_skb(skb); return NET_RX_DROP; } /** * tipc_l2_device_event - handle device events from network device * @nb: the context of the notification * @evt: the type of event * @ptr: the net device that the event was on * * This function is called by the Ethernet driver in case of link * change event. */ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct tipc_bearer *b; b = rtnl_dereference(dev->tipc_ptr); if (!b) return NOTIFY_DONE; trace_tipc_l2_device_event(dev, b, evt); switch (evt) { case NETDEV_CHANGE: if (netif_carrier_ok(dev) && netif_oper_up(dev)) { test_and_set_bit_lock(0, &b->up); break; } fallthrough; case NETDEV_GOING_DOWN: clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; case NETDEV_UP: test_and_set_bit_lock(0, &b->up); break; case NETDEV_CHANGEMTU: if (tipc_mtu_bad(dev)) { bearer_disable(net, b); break; } b->mtu = dev->mtu; tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: bearer_disable(net, b); break; } return NOTIFY_OK; } static struct notifier_block notifier = { .notifier_call = tipc_l2_device_event, .priority = 0, }; int tipc_bearer_setup(void) { return register_netdevice_notifier(&notifier); } void tipc_bearer_cleanup(void) { unregister_netdevice_notifier(&notifier); } void tipc_bearer_stop(struct net *net) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { b = rtnl_dereference(tn->bearer_list[i]); if (b) { bearer_disable(net, b); tn->bearer_list[i] = NULL; } } } void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) { struct net_device *dev = net->loopback_dev; struct sk_buff *skb, *_skb; int exp; skb_queue_walk(pkts, _skb) { skb = pskb_copy(_skb, GFP_ATOMIC); if (!skb) continue; exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { kfree_skb(skb); continue; } skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, dev->dev_addr, skb->len); skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); } } static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *od) { consume_skb(skb); return NET_RX_SUCCESS; } int tipc_attach_loopback(struct net *net) { struct net_device *dev = net->loopback_dev; struct tipc_net *tn = tipc_net(net); if (!dev) return -ENODEV; netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); tn->loopback_pt.dev = dev; tn->loopback_pt.type = htons(ETH_P_TIPC); tn->loopback_pt.func = tipc_loopback_rcv_pkt; dev_add_pack(&tn->loopback_pt); return 0; } void tipc_detach_loopback(struct net *net) { struct tipc_net *tn = tipc_net(net); dev_remove_pack(&tn->loopback_pt); netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); } /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_BEARER_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win)) goto prop_msg_full; if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) goto prop_msg_full; nla_nest_end(msg->skb, prop); #ifdef CONFIG_TIPC_MEDIA_UDP if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) { if (tipc_udp_nl_add_bearer_data(msg, bearer)) goto attr_msg_full; } #endif nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; int i = cb->args[0]; struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct net *net = sock_net(skb->sk); struct tipc_net *tn = tipc_net(net); if (i == MAX_BEARERS) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); for (i = 0; i < MAX_BEARERS; i++) { bearer = rtnl_dereference(tn->bearer_list[i]); if (!bearer) continue; err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); if (err) break; } rtnl_unlock(); cb->args[0] = i; return skb->len; } int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct sk_buff *rep; struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!rep) return -ENOMEM; msg.skb = rep; msg.portid = info->snd_portid; msg.seq = info->snd_seq; rtnl_lock(); bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } err = __tipc_nl_add_bearer(&msg, bearer, 0); if (err) goto err_out; rtnl_unlock(); return genlmsg_reply(rep, info); err_out: rtnl_unlock(); nlmsg_free(rep); return err; } int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_bearer *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); if (!bearer) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } bearer_disable(net, bearer); return 0; } int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_disable(skb, info); rtnl_unlock(); return err; } int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; char *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); u32 domain = 0; u32 prio; prio = TIPC_MEDIA_LINK_PRI; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); if (attrs[TIPC_NLA_BEARER_DOMAIN]) domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_PRIO]) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } return tipc_enable_bearer(net, bearer, domain, prio, attrs, info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_enable(skb, info); rtnl_unlock(); return err; } int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); return -EINVAL; } err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); if (err) { rtnl_unlock(); return err; } } #endif rtnl_unlock(); return 0; } int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); char *name; int err; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); b = tipc_bearer_find(net, name); if (!b) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_TOL]) { b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL); } if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "MTU property is unsupported"); return -EINVAL; } #ifdef CONFIG_TIPC_MEDIA_UDP if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) < b->encap_hlen + TIPC_MIN_BEARER_MTU) { NL_SET_ERR_MSG(info->extack, "MTU value is out-of-range"); return -EINVAL; } b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); #endif } } return 0; } int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_set(skb, info); rtnl_unlock(); return err; } static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_MEDIA_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win)) goto prop_msg_full; if (media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) goto prop_msg_full; nla_nest_end(msg->skb, prop); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; int i = cb->args[0]; struct tipc_nl_msg msg; if (i == MAX_MEDIA) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); for (; media_info_array[i] != NULL; i++) { err = __tipc_nl_add_media(&msg, media_info_array[i], NLM_F_MULTI); if (err) break; } rtnl_unlock(); cb->args[0] = i; return skb->len; } int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], tipc_nl_media_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!rep) return -ENOMEM; msg.skb = rep; msg.portid = info->snd_portid; msg.seq = info->snd_seq; rtnl_lock(); media = tipc_media_find(name); if (!media) { NL_SET_ERR_MSG(info->extack, "Media not found"); err = -EINVAL; goto err_out; } err = __tipc_nl_add_media(&msg, media, 0); if (err) goto err_out; rtnl_unlock(); return genlmsg_reply(rep, info); err_out: rtnl_unlock(); nlmsg_free(rep); return err; } int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_media *m; struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); m = tipc_media_find(name); if (!m) { NL_SET_ERR_MSG(info->extack, "Media not found"); return -EINVAL; } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_TOL]) m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); if (props[TIPC_NLA_PROP_PRIO]) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (m->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "MTU property is unsupported"); return -EINVAL; } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 (props[TIPC_NLA_PROP_MTU]))) { NL_SET_ERR_MSG(info->extack, "MTU value is out-of-range"); return -EINVAL; } m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); #endif } } return 0; } int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_media_set(skb, info); rtnl_unlock(); return err; }
6 1 20 11 9 8 1 7 2 2 8 2 2 3 3 3 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/minix/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include "minix.h" static int add_nondir(struct dentry *dentry, struct inode *inode) { int err = minix_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); iput(inode); return err; } static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); ino = minix_inode_by_name(dentry); if (ino) inode = minix_iget(dir->i_sb, ino); return d_splice_alias(inode, dentry); } static int minix_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; if (!old_valid_dev(rdev)) return -EINVAL; inode = minix_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); minix_set_inode(inode, rdev); mark_inode_dirty(inode); return add_nondir(dentry, inode); } static int minix_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct inode *inode = minix_new_inode(dir, mode); if (IS_ERR(inode)) return finish_open_simple(file, PTR_ERR(inode)); minix_set_inode(inode, 0); mark_inode_dirty(inode); d_tmpfile(file, inode); return finish_open_simple(file, 0); } static int minix_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return minix_mknod(&nop_mnt_idmap, dir, dentry, mode, 0); } static int minix_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int i = strlen(symname)+1; struct inode * inode; int err; if (i > dir->i_sb->s_blocksize) return -ENAMETOOLONG; inode = minix_new_inode(dir, S_IFLNK | 0777); if (IS_ERR(inode)) return PTR_ERR(inode); minix_set_inode(inode, 0); err = page_symlink(inode, symname, i); if (unlikely(err)) { inode_dec_link_count(inode); iput(inode); return err; } return add_nondir(dentry, inode); } static int minix_link(struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); return add_nondir(dentry, inode); } static int minix_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode * inode; int err; inode = minix_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode_inc_link_count(dir); minix_set_inode(inode, 0); inode_inc_link_count(inode); err = minix_make_empty(inode, dir); if (err) goto out_fail; err = minix_add_link(dentry, inode); if (err) goto out_fail; d_instantiate(dentry, inode); out: return err; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); iput(inode); inode_dec_link_count(dir); goto out; } static int minix_unlink(struct inode * dir, struct dentry *dentry) { struct inode * inode = d_inode(dentry); struct page * page; struct minix_dir_entry * de; int err; de = minix_find_entry(dentry, &page); if (!de) return -ENOENT; err = minix_delete_entry(de, page); unmap_and_put_page(page, de); if (err) return err; inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); return 0; } static int minix_rmdir(struct inode * dir, struct dentry *dentry) { struct inode * inode = d_inode(dentry); int err = -ENOTEMPTY; if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { inode_dec_link_count(dir); inode_dec_link_count(inode); } } return err; } static int minix_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct inode * old_inode = d_inode(old_dentry); struct inode * new_inode = d_inode(new_dentry); struct page * dir_page = NULL; struct minix_dir_entry * dir_de = NULL; struct page * old_page; struct minix_dir_entry * old_de; int err = -ENOENT; if (flags & ~RENAME_NOREPLACE) return -EINVAL; old_de = minix_find_entry(old_dentry, &old_page); if (!old_de) goto out; if (S_ISDIR(old_inode->i_mode)) { err = -EIO; dir_de = minix_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { struct page * new_page; struct minix_dir_entry * new_de; err = -ENOTEMPTY; if (dir_de && !minix_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_de = minix_find_entry(new_dentry, &new_page); if (!new_de) goto out_dir; err = minix_set_link(new_de, new_page, old_inode); kunmap(new_page); put_page(new_page); if (err) goto out_dir; inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { err = minix_add_link(new_dentry, old_inode); if (err) goto out_dir; if (dir_de) inode_inc_link_count(new_dir); } err = minix_delete_entry(old_de, old_page); if (err) goto out_dir; mark_inode_dirty(old_inode); if (dir_de) { err = minix_set_link(dir_de, dir_page, new_dir); if (!err) inode_dec_link_count(old_dir); } out_dir: if (dir_de) unmap_and_put_page(dir_page, dir_de); out_old: unmap_and_put_page(old_page, old_de); out: return err; } /* * directories can handle most operations... */ const struct inode_operations minix_dir_inode_operations = { .create = minix_create, .lookup = minix_lookup, .link = minix_link, .unlink = minix_unlink, .symlink = minix_symlink, .mkdir = minix_mkdir, .rmdir = minix_rmdir, .mknod = minix_mknod, .rename = minix_rename, .getattr = minix_getattr, .tmpfile = minix_tmpfile, };
3 3 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/seq_file.h> #include <net/ip.h> #include <net/mptcp.h> #include <net/snmp.h> #include <net/net_namespace.h> #include "mib.h" static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX), SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX), SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX), SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX), SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX), SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX), SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX), SNMP_MIB_ITEM("MPFastcloseTx", MPTCP_MIB_MPFASTCLOSETX), SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX), SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX), SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED), SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_SENTINEL }; /* mptcp_mib_alloc - allocate percpu mib counters * * These are allocated when the first mptcp socket is created so * we do not waste percpu memory if mptcp isn't in use. */ bool mptcp_mib_alloc(struct net *net) { struct mptcp_mib __percpu *mib = alloc_percpu(struct mptcp_mib); if (!mib) return false; if (cmpxchg(&net->mib.mptcp_statistics, NULL, mib)) free_percpu(mib); return true; } void mptcp_seq_show(struct seq_file *seq) { unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1]; struct net *net = seq->private; int i; seq_puts(seq, "MPTcpExt:"); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %s", mptcp_snmp_list[i].name); seq_puts(seq, "\nMPTcpExt:"); memset(sum, 0, sizeof(sum)); if (net->mib.mptcp_statistics) snmp_get_cpu_field_batch(sum, mptcp_snmp_list, net->mib.mptcp_statistics); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %lu", sum[i]); seq_putc(seq, '\n'); }
45 45 6 1 1 4 3 3 4 7 7 6 6 6 11 1 10 1 10 50 50 45 45 27 27 27 27 27 10 10 10 9 10 6 3 10 10 6 6 10 8 10 50 49 27 27 27 3 27 27 27 24 24 24 24 50 47 50 45 50 50 45 47 50 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 // SPDX-License-Identifier: GPL-2.0-or-later /* * Information interface for ALSA driver * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/init.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/module.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/info.h> #include <linux/utsname.h> #include <linux/proc_fs.h> #include <linux/mutex.h> int snd_info_check_reserved_words(const char *str) { static const char * const reserved[] = { "version", "meminfo", "memdebug", "detect", "devices", "oss", "cards", "timers", "synth", "pcm", "seq", NULL }; const char * const *xstr = reserved; while (*xstr) { if (!strcmp(*xstr, str)) return 0; xstr++; } if (!strncmp(str, "card", 4)) return 0; return 1; } static DEFINE_MUTEX(info_mutex); struct snd_info_private_data { struct snd_info_buffer *rbuffer; struct snd_info_buffer *wbuffer; struct snd_info_entry *entry; void *file_private_data; }; static int snd_info_version_init(void); static void snd_info_clear_entries(struct snd_info_entry *entry); /* */ static struct snd_info_entry *snd_proc_root; struct snd_info_entry *snd_seq_root; EXPORT_SYMBOL(snd_seq_root); #ifdef CONFIG_SND_OSSEMUL struct snd_info_entry *snd_oss_root; #endif static int alloc_info_private(struct snd_info_entry *entry, struct snd_info_private_data **ret) { struct snd_info_private_data *data; if (!entry || !entry->p) return -ENODEV; if (!try_module_get(entry->module)) return -EFAULT; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) { module_put(entry->module); return -ENOMEM; } data->entry = entry; *ret = data; return 0; } static bool valid_pos(loff_t pos, size_t count) { if (pos < 0 || (long) pos != pos || (ssize_t) count < 0) return false; if ((unsigned long) pos + (unsigned long) count < (unsigned long) pos) return false; return true; } /* * file ops for binary proc files */ static loff_t snd_info_entry_llseek(struct file *file, loff_t offset, int orig) { struct snd_info_private_data *data; struct snd_info_entry *entry; loff_t ret = -EINVAL, size; data = file->private_data; entry = data->entry; mutex_lock(&entry->access); if (entry->c.ops->llseek) { ret = entry->c.ops->llseek(entry, data->file_private_data, file, offset, orig); goto out; } size = entry->size; switch (orig) { case SEEK_SET: break; case SEEK_CUR: offset += file->f_pos; break; case SEEK_END: if (!size) goto out; offset += size; break; default: goto out; } if (offset < 0) goto out; if (size && offset > size) offset = size; file->f_pos = offset; ret = offset; out: mutex_unlock(&entry->access); return ret; } static ssize_t snd_info_entry_read(struct file *file, char __user *buffer, size_t count, loff_t * offset) { struct snd_info_private_data *data = file->private_data; struct snd_info_entry *entry = data->entry; size_t size; loff_t pos; pos = *offset; if (!valid_pos(pos, count)) return -EIO; if (pos >= entry->size) return 0; size = entry->size - pos; size = min(count, size); size = entry->c.ops->read(entry, data->file_private_data, file, buffer, size, pos); if ((ssize_t) size > 0) *offset = pos + size; return size; } static ssize_t snd_info_entry_write(struct file *file, const char __user *buffer, size_t count, loff_t * offset) { struct snd_info_private_data *data = file->private_data; struct snd_info_entry *entry = data->entry; ssize_t size = 0; loff_t pos; pos = *offset; if (!valid_pos(pos, count)) return -EIO; if (count > 0) { size_t maxsize = entry->size - pos; count = min(count, maxsize); size = entry->c.ops->write(entry, data->file_private_data, file, buffer, count, pos); } if (size > 0) *offset = pos + size; return size; } static __poll_t snd_info_entry_poll(struct file *file, poll_table *wait) { struct snd_info_private_data *data = file->private_data; struct snd_info_entry *entry = data->entry; __poll_t mask = 0; if (entry->c.ops->poll) return entry->c.ops->poll(entry, data->file_private_data, file, wait); if (entry->c.ops->read) mask |= EPOLLIN | EPOLLRDNORM; if (entry->c.ops->write) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } static long snd_info_entry_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_info_private_data *data = file->private_data; struct snd_info_entry *entry = data->entry; if (!entry->c.ops->ioctl) return -ENOTTY; return entry->c.ops->ioctl(entry, data->file_private_data, file, cmd, arg); } static int snd_info_entry_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); struct snd_info_private_data *data; struct snd_info_entry *entry; data = file->private_data; if (data == NULL) return 0; entry = data->entry; if (!entry->c.ops->mmap) return -ENXIO; return entry->c.ops->mmap(entry, data->file_private_data, inode, file, vma); } static int snd_info_entry_open(struct inode *inode, struct file *file) { struct snd_info_entry *entry = pde_data(inode); struct snd_info_private_data *data; int mode, err; mutex_lock(&info_mutex); err = alloc_info_private(entry, &data); if (err < 0) goto unlock; mode = file->f_flags & O_ACCMODE; if (((mode == O_RDONLY || mode == O_RDWR) && !entry->c.ops->read) || ((mode == O_WRONLY || mode == O_RDWR) && !entry->c.ops->write)) { err = -ENODEV; goto error; } if (entry->c.ops->open) { err = entry->c.ops->open(entry, mode, &data->file_private_data); if (err < 0) goto error; } file->private_data = data; mutex_unlock(&info_mutex); return 0; error: kfree(data); module_put(entry->module); unlock: mutex_unlock(&info_mutex); return err; } static int snd_info_entry_release(struct inode *inode, struct file *file) { struct snd_info_private_data *data = file->private_data; struct snd_info_entry *entry = data->entry; if (entry->c.ops->release) entry->c.ops->release(entry, file->f_flags & O_ACCMODE, data->file_private_data); module_put(entry->module); kfree(data); return 0; } static const struct proc_ops snd_info_entry_operations = { .proc_lseek = snd_info_entry_llseek, .proc_read = snd_info_entry_read, .proc_write = snd_info_entry_write, .proc_poll = snd_info_entry_poll, .proc_ioctl = snd_info_entry_ioctl, .proc_mmap = snd_info_entry_mmap, .proc_open = snd_info_entry_open, .proc_release = snd_info_entry_release, }; /* * file ops for text proc files */ static ssize_t snd_info_text_entry_write(struct file *file, const char __user *buffer, size_t count, loff_t *offset) { struct seq_file *m = file->private_data; struct snd_info_private_data *data = m->private; struct snd_info_entry *entry = data->entry; struct snd_info_buffer *buf; loff_t pos; size_t next; int err = 0; if (!entry->c.text.write) return -EIO; pos = *offset; if (!valid_pos(pos, count)) return -EIO; next = pos + count; /* don't handle too large text inputs */ if (next > 16 * 1024) return -EIO; mutex_lock(&entry->access); buf = data->wbuffer; if (!buf) { data->wbuffer = buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) { err = -ENOMEM; goto error; } } if (next > buf->len) { char *nbuf = kvzalloc(PAGE_ALIGN(next), GFP_KERNEL); if (!nbuf) { err = -ENOMEM; goto error; } kvfree(buf->buffer); buf->buffer = nbuf; buf->len = PAGE_ALIGN(next); } if (copy_from_user(buf->buffer + pos, buffer, count)) { err = -EFAULT; goto error; } buf->size = next; error: mutex_unlock(&entry->access); if (err < 0) return err; *offset = next; return count; } static int snd_info_seq_show(struct seq_file *seq, void *p) { struct snd_info_private_data *data = seq->private; struct snd_info_entry *entry = data->entry; if (!entry->c.text.read) { return -EIO; } else { data->rbuffer->buffer = (char *)seq; /* XXX hack! */ entry->c.text.read(entry, data->rbuffer); } return 0; } static int snd_info_text_entry_open(struct inode *inode, struct file *file) { struct snd_info_entry *entry = pde_data(inode); struct snd_info_private_data *data; int err; mutex_lock(&info_mutex); err = alloc_info_private(entry, &data); if (err < 0) goto unlock; data->rbuffer = kzalloc(sizeof(*data->rbuffer), GFP_KERNEL); if (!data->rbuffer) { err = -ENOMEM; goto error; } if (entry->size) err = single_open_size(file, snd_info_seq_show, data, entry->size); else err = single_open(file, snd_info_seq_show, data); if (err < 0) goto error; mutex_unlock(&info_mutex); return 0; error: kfree(data->rbuffer); kfree(data); module_put(entry->module); unlock: mutex_unlock(&info_mutex); return err; } static int snd_info_text_entry_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; struct snd_info_private_data *data = m->private; struct snd_info_entry *entry = data->entry; if (data->wbuffer && entry->c.text.write) entry->c.text.write(entry, data->wbuffer); single_release(inode, file); kfree(data->rbuffer); if (data->wbuffer) { kvfree(data->wbuffer->buffer); kfree(data->wbuffer); } module_put(entry->module); kfree(data); return 0; } static const struct proc_ops snd_info_text_entry_ops = { .proc_open = snd_info_text_entry_open, .proc_release = snd_info_text_entry_release, .proc_write = snd_info_text_entry_write, .proc_lseek = seq_lseek, .proc_read = seq_read, }; static struct snd_info_entry *create_subdir(struct module *mod, const char *name) { struct snd_info_entry *entry; entry = snd_info_create_module_entry(mod, name, NULL); if (!entry) return NULL; entry->mode = S_IFDIR | 0555; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); return NULL; } return entry; } static struct snd_info_entry * snd_info_create_entry(const char *name, struct snd_info_entry *parent, struct module *module); int __init snd_info_init(void) { snd_proc_root = snd_info_create_entry("asound", NULL, THIS_MODULE); if (!snd_proc_root) return -ENOMEM; snd_proc_root->mode = S_IFDIR | 0555; snd_proc_root->p = proc_mkdir("asound", NULL); if (!snd_proc_root->p) goto error; #ifdef CONFIG_SND_OSSEMUL snd_oss_root = create_subdir(THIS_MODULE, "oss"); if (!snd_oss_root) goto error; #endif #if IS_ENABLED(CONFIG_SND_SEQUENCER) snd_seq_root = create_subdir(THIS_MODULE, "seq"); if (!snd_seq_root) goto error; #endif if (snd_info_version_init() < 0 || snd_minor_info_init() < 0 || snd_minor_info_oss_init() < 0 || snd_card_info_init() < 0 || snd_info_minor_register() < 0) goto error; return 0; error: snd_info_free_entry(snd_proc_root); return -ENOMEM; } int __exit snd_info_done(void) { snd_info_free_entry(snd_proc_root); return 0; } static void snd_card_id_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_card *card = entry->private_data; snd_iprintf(buffer, "%s\n", card->id); } /* * create a card proc file * called from init.c */ int snd_info_card_create(struct snd_card *card) { char str[8]; struct snd_info_entry *entry; if (snd_BUG_ON(!card)) return -ENXIO; sprintf(str, "card%i", card->number); entry = create_subdir(card->module, str); if (!entry) return -ENOMEM; card->proc_root = entry; return snd_card_ro_proc_new(card, "id", card, snd_card_id_read); } /* * register the card proc file * called from init.c * can be called multiple times for reinitialization */ int snd_info_card_register(struct snd_card *card) { struct proc_dir_entry *p; int err; if (snd_BUG_ON(!card)) return -ENXIO; err = snd_info_register(card->proc_root); if (err < 0) return err; if (!strcmp(card->id, card->proc_root->name)) return 0; if (card->proc_root_link) return 0; p = proc_symlink(card->id, snd_proc_root->p, card->proc_root->name); if (!p) return -ENOMEM; card->proc_root_link = p; return 0; } /* * called on card->id change */ void snd_info_card_id_change(struct snd_card *card) { mutex_lock(&info_mutex); if (card->proc_root_link) { proc_remove(card->proc_root_link); card->proc_root_link = NULL; } if (strcmp(card->id, card->proc_root->name)) card->proc_root_link = proc_symlink(card->id, snd_proc_root->p, card->proc_root->name); mutex_unlock(&info_mutex); } /* * de-register the card proc file * called from init.c */ void snd_info_card_disconnect(struct snd_card *card) { if (!card) return; proc_remove(card->proc_root_link); if (card->proc_root) proc_remove(card->proc_root->p); mutex_lock(&info_mutex); if (card->proc_root) snd_info_clear_entries(card->proc_root); card->proc_root_link = NULL; card->proc_root = NULL; mutex_unlock(&info_mutex); } /* * release the card proc file resources * called from init.c */ int snd_info_card_free(struct snd_card *card) { if (!card) return 0; snd_info_free_entry(card->proc_root); card->proc_root = NULL; return 0; } /** * snd_info_get_line - read one line from the procfs buffer * @buffer: the procfs buffer * @line: the buffer to store * @len: the max. buffer size * * Reads one line from the buffer and stores the string. * * Return: Zero if successful, or 1 if error or EOF. */ int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len) { int c; if (snd_BUG_ON(!buffer)) return 1; if (!buffer->buffer) return 1; if (len <= 0 || buffer->stop || buffer->error) return 1; while (!buffer->stop) { c = buffer->buffer[buffer->curr++]; if (buffer->curr >= buffer->size) buffer->stop = 1; if (c == '\n') break; if (len > 1) { len--; *line++ = c; } } *line = '\0'; return 0; } EXPORT_SYMBOL(snd_info_get_line); /** * snd_info_get_str - parse a string token * @dest: the buffer to store the string token * @src: the original string * @len: the max. length of token - 1 * * Parses the original string and copy a token to the given * string buffer. * * Return: The updated pointer of the original string so that * it can be used for the next call. */ const char *snd_info_get_str(char *dest, const char *src, int len) { int c; while (*src == ' ' || *src == '\t') src++; if (*src == '"' || *src == '\'') { c = *src++; while (--len > 0 && *src && *src != c) { *dest++ = *src++; } if (*src == c) src++; } else { while (--len > 0 && *src && *src != ' ' && *src != '\t') { *dest++ = *src++; } } *dest = 0; while (*src == ' ' || *src == '\t') src++; return src; } EXPORT_SYMBOL(snd_info_get_str); /* * snd_info_create_entry - create an info entry * @name: the proc file name * @parent: the parent directory * * Creates an info entry with the given file name and initializes as * the default state. * * Usually called from other functions such as * snd_info_create_card_entry(). * * Return: The pointer of the new instance, or %NULL on failure. */ static struct snd_info_entry * snd_info_create_entry(const char *name, struct snd_info_entry *parent, struct module *module) { struct snd_info_entry *entry; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return NULL; entry->name = kstrdup(name, GFP_KERNEL); if (entry->name == NULL) { kfree(entry); return NULL; } entry->mode = S_IFREG | 0444; entry->content = SNDRV_INFO_CONTENT_TEXT; mutex_init(&entry->access); INIT_LIST_HEAD(&entry->children); INIT_LIST_HEAD(&entry->list); entry->parent = parent; entry->module = module; if (parent) { mutex_lock(&parent->access); list_add_tail(&entry->list, &parent->children); mutex_unlock(&parent->access); } return entry; } /** * snd_info_create_module_entry - create an info entry for the given module * @module: the module pointer * @name: the file name * @parent: the parent directory * * Creates a new info entry and assigns it to the given module. * * Return: The pointer of the new instance, or %NULL on failure. */ struct snd_info_entry *snd_info_create_module_entry(struct module * module, const char *name, struct snd_info_entry *parent) { if (!parent) parent = snd_proc_root; return snd_info_create_entry(name, parent, module); } EXPORT_SYMBOL(snd_info_create_module_entry); /** * snd_info_create_card_entry - create an info entry for the given card * @card: the card instance * @name: the file name * @parent: the parent directory * * Creates a new info entry and assigns it to the given card. * * Return: The pointer of the new instance, or %NULL on failure. */ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, const char *name, struct snd_info_entry * parent) { if (!parent) parent = card->proc_root; return snd_info_create_entry(name, parent, card->module); } EXPORT_SYMBOL(snd_info_create_card_entry); static void snd_info_clear_entries(struct snd_info_entry *entry) { struct snd_info_entry *p; if (!entry->p) return; list_for_each_entry(p, &entry->children, list) snd_info_clear_entries(p); entry->p = NULL; } /** * snd_info_free_entry - release the info entry * @entry: the info entry * * Releases the info entry. */ void snd_info_free_entry(struct snd_info_entry * entry) { struct snd_info_entry *p, *n; if (!entry) return; if (entry->p) { proc_remove(entry->p); mutex_lock(&info_mutex); snd_info_clear_entries(entry); mutex_unlock(&info_mutex); } /* free all children at first */ list_for_each_entry_safe(p, n, &entry->children, list) snd_info_free_entry(p); p = entry->parent; if (p) { mutex_lock(&p->access); list_del(&entry->list); mutex_unlock(&p->access); } kfree(entry->name); if (entry->private_free) entry->private_free(entry); kfree(entry); } EXPORT_SYMBOL(snd_info_free_entry); static int __snd_info_register(struct snd_info_entry *entry) { struct proc_dir_entry *root, *p = NULL; if (snd_BUG_ON(!entry)) return -ENXIO; root = entry->parent == NULL ? snd_proc_root->p : entry->parent->p; mutex_lock(&info_mutex); if (entry->p || !root) goto unlock; if (S_ISDIR(entry->mode)) { p = proc_mkdir_mode(entry->name, entry->mode, root); if (!p) { mutex_unlock(&info_mutex); return -ENOMEM; } } else { const struct proc_ops *ops; if (entry->content == SNDRV_INFO_CONTENT_DATA) ops = &snd_info_entry_operations; else ops = &snd_info_text_entry_ops; p = proc_create_data(entry->name, entry->mode, root, ops, entry); if (!p) { mutex_unlock(&info_mutex); return -ENOMEM; } proc_set_size(p, entry->size); } entry->p = p; unlock: mutex_unlock(&info_mutex); return 0; } /** * snd_info_register - register the info entry * @entry: the info entry * * Registers the proc info entry. * The all children entries are registered recursively. * * Return: Zero if successful, or a negative error code on failure. */ int snd_info_register(struct snd_info_entry *entry) { struct snd_info_entry *p; int err; if (!entry->p) { err = __snd_info_register(entry); if (err < 0) return err; } list_for_each_entry(p, &entry->children, list) { err = snd_info_register(p); if (err < 0) return err; } return 0; } EXPORT_SYMBOL(snd_info_register); /** * snd_card_rw_proc_new - Create a read/write text proc file entry for the card * @card: the card instance * @name: the file name * @private_data: the arbitrary private data * @read: the read callback * @write: the write callback, NULL for read-only * * This proc file entry will be registered via snd_card_register() call, and * it will be removed automatically at the card removal, too. * * Return: zero if successful, or a negative error code */ int snd_card_rw_proc_new(struct snd_card *card, const char *name, void *private_data, void (*read)(struct snd_info_entry *, struct snd_info_buffer *), void (*write)(struct snd_info_entry *entry, struct snd_info_buffer *buffer)) { struct snd_info_entry *entry; entry = snd_info_create_card_entry(card, name, card->proc_root); if (!entry) return -ENOMEM; snd_info_set_text_ops(entry, private_data, read); if (write) { entry->mode |= 0200; entry->c.text.write = write; } return 0; } EXPORT_SYMBOL_GPL(snd_card_rw_proc_new); /* */ static void snd_info_version_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { snd_iprintf(buffer, "Advanced Linux Sound Architecture Driver Version k%s.\n", init_utsname()->release); } static int __init snd_info_version_init(void) { struct snd_info_entry *entry; entry = snd_info_create_module_entry(THIS_MODULE, "version", NULL); if (entry == NULL) return -ENOMEM; entry->c.text.read = snd_info_version_read; return snd_info_register(entry); /* freed in error path */ }
2 2 1 1 1 1 1 2 2 2 2 2 2 2 2 3 3 2 1 3 2 2 2 1 1 1 12 1 12 12 1 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. */ #include <linux/skbuff.h> #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_bonding.h> #include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/bonding.h> #include <net/bond_3ad.h> #include <net/netlink.h> /* General definitions */ #define AD_SHORT_TIMEOUT 1 #define AD_LONG_TIMEOUT 0 #define AD_STANDBY 0x2 #define AD_MAX_TX_IN_SECOND 3 #define AD_COLLECTOR_MAX_DELAY 0 /* Timer definitions (43.4.4 in the 802.3ad standard) */ #define AD_FAST_PERIODIC_TIME 1 #define AD_SLOW_PERIODIC_TIME 30 #define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME) #define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME) #define AD_CHURN_DETECTION_TIME 60 #define AD_AGGREGATE_WAIT_TIME 2 /* Port Variables definitions used by the State Machines (43.4.7 in the * 802.3ad standard) */ #define AD_PORT_BEGIN 0x1 #define AD_PORT_LACP_ENABLED 0x2 #define AD_PORT_ACTOR_CHURN 0x4 #define AD_PORT_PARTNER_CHURN 0x8 #define AD_PORT_READY 0x10 #define AD_PORT_READY_N 0x20 #define AD_PORT_MATCHED 0x40 #define AD_PORT_STANDBY 0x80 #define AD_PORT_SELECTED 0x100 #define AD_PORT_MOVED 0x200 #define AD_PORT_CHURNED (AD_PORT_ACTOR_CHURN | AD_PORT_PARTNER_CHURN) /* Port Key definitions * key is determined according to the link speed, duplex and * user key (which is yet not supported) * -------------------------------------------------------------- * Port key | User key (10 bits) | Speed (5 bits) | Duplex| * -------------------------------------------------------------- * |15 6|5 1|0 */ #define AD_DUPLEX_KEY_MASKS 0x1 #define AD_SPEED_KEY_MASKS 0x3E #define AD_USER_KEY_MASKS 0xFFC0 enum ad_link_speed_type { AD_LINK_SPEED_1MBPS = 1, AD_LINK_SPEED_10MBPS, AD_LINK_SPEED_100MBPS, AD_LINK_SPEED_1000MBPS, AD_LINK_SPEED_2500MBPS, AD_LINK_SPEED_5000MBPS, AD_LINK_SPEED_10000MBPS, AD_LINK_SPEED_14000MBPS, AD_LINK_SPEED_20000MBPS, AD_LINK_SPEED_25000MBPS, AD_LINK_SPEED_40000MBPS, AD_LINK_SPEED_50000MBPS, AD_LINK_SPEED_56000MBPS, AD_LINK_SPEED_100000MBPS, AD_LINK_SPEED_200000MBPS, AD_LINK_SPEED_400000MBPS, AD_LINK_SPEED_800000MBPS, }; /* compare MAC addresses */ #define MAC_ADDRESS_EQUAL(A, B) \ ether_addr_equal_64bits((const u8 *)A, (const u8 *)B) static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = { 0, 0, 0, 0, 0, 0 }; static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }; /* ================= main 802.3ad protocol functions ================== */ static int ad_lacpdu_send(struct port *port); static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); static void ad_periodic_machine(struct port *port, struct bond_params *bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); static void ad_initialize_port(struct port *port, int lacp_fast); static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); static void ad_marker_response_received(struct bond_marker *marker, struct port *port); static void ad_update_actor_keys(struct port *port, bool reset); /* ================= api to bonding and kernel code ================== */ /** * __get_bond_by_port - get the port's bonding struct * @port: the port we're looking at * * Return @port's bonding struct, or %NULL if it can't be found. */ static inline struct bonding *__get_bond_by_port(struct port *port) { if (port->slave == NULL) return NULL; return bond_get_bond_by_slave(port->slave); } /** * __get_first_agg - get the first aggregator in the bond * @port: the port we're looking at * * Return the aggregator of the first slave in @bond, or %NULL if it can't be * found. * The caller must hold RCU or RTNL lock. */ static inline struct aggregator *__get_first_agg(struct port *port) { struct bonding *bond = __get_bond_by_port(port); struct slave *first_slave; struct aggregator *agg; /* If there's no bond for this port, or bond has no slaves */ if (bond == NULL) return NULL; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); agg = first_slave ? &(SLAVE_AD_INFO(first_slave)->aggregator) : NULL; rcu_read_unlock(); return agg; } /** * __agg_has_partner - see if we have a partner * @agg: the agregator we're looking at * * Return nonzero if aggregator has a partner (denoted by a non-zero ether * address for the partner). Return 0 if not. */ static inline int __agg_has_partner(struct aggregator *agg) { return !is_zero_ether_addr(agg->partner_system.mac_addr_value); } /** * __disable_port - disable the port's slave * @port: the port we're looking at */ static inline void __disable_port(struct port *port) { bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** * __enable_port - enable the port's slave, if it's up * @port: the port we're looking at */ static inline void __enable_port(struct port *port) { struct slave *slave = port->slave; if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** * __port_is_enabled - check if the port's slave is in active state * @port: the port we're looking at */ static inline int __port_is_enabled(struct port *port) { return bond_is_active_slave(port->slave); } /** * __get_agg_selection_mode - get the aggregator selection mode * @port: the port we're looking at * * Get the aggregator selection mode. Can be %STABLE, %BANDWIDTH or %COUNT. */ static inline u32 __get_agg_selection_mode(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return BOND_AD_STABLE; return bond->params.ad_select; } /** * __check_agg_selection_timer - check if the selection timer has expired * @port: the port we're looking at */ static inline int __check_agg_selection_timer(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return 0; return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0; } /** * __get_link_speed - get a port's speed * @port: the port we're looking at * * Return @port's speed in 802.3ad enum format. i.e. one of: * 0, * %AD_LINK_SPEED_10MBPS, * %AD_LINK_SPEED_100MBPS, * %AD_LINK_SPEED_1000MBPS, * %AD_LINK_SPEED_2500MBPS, * %AD_LINK_SPEED_5000MBPS, * %AD_LINK_SPEED_10000MBPS * %AD_LINK_SPEED_14000MBPS, * %AD_LINK_SPEED_20000MBPS * %AD_LINK_SPEED_25000MBPS * %AD_LINK_SPEED_40000MBPS * %AD_LINK_SPEED_50000MBPS * %AD_LINK_SPEED_56000MBPS * %AD_LINK_SPEED_100000MBPS * %AD_LINK_SPEED_200000MBPS * %AD_LINK_SPEED_400000MBPS * %AD_LINK_SPEED_800000MBPS */ static u16 __get_link_speed(struct port *port) { struct slave *slave = port->slave; u16 speed; /* this if covers only a special case: when the configuration starts * with link down, it sets the speed to 0. * This is done in spite of the fact that the e100 driver reports 0 * to be compatible with MVT in the future. */ if (slave->link != BOND_LINK_UP) speed = 0; else { switch (slave->speed) { case SPEED_10: speed = AD_LINK_SPEED_10MBPS; break; case SPEED_100: speed = AD_LINK_SPEED_100MBPS; break; case SPEED_1000: speed = AD_LINK_SPEED_1000MBPS; break; case SPEED_2500: speed = AD_LINK_SPEED_2500MBPS; break; case SPEED_5000: speed = AD_LINK_SPEED_5000MBPS; break; case SPEED_10000: speed = AD_LINK_SPEED_10000MBPS; break; case SPEED_14000: speed = AD_LINK_SPEED_14000MBPS; break; case SPEED_20000: speed = AD_LINK_SPEED_20000MBPS; break; case SPEED_25000: speed = AD_LINK_SPEED_25000MBPS; break; case SPEED_40000: speed = AD_LINK_SPEED_40000MBPS; break; case SPEED_50000: speed = AD_LINK_SPEED_50000MBPS; break; case SPEED_56000: speed = AD_LINK_SPEED_56000MBPS; break; case SPEED_100000: speed = AD_LINK_SPEED_100000MBPS; break; case SPEED_200000: speed = AD_LINK_SPEED_200000MBPS; break; case SPEED_400000: speed = AD_LINK_SPEED_400000MBPS; break; case SPEED_800000: speed = AD_LINK_SPEED_800000MBPS; break; default: /* unknown speed value from ethtool. shouldn't happen */ if (slave->speed != SPEED_UNKNOWN) pr_err_once("%s: (slave %s): unknown ethtool speed (%d) for port %d (set it to 0)\n", slave->bond->dev->name, slave->dev->name, slave->speed, port->actor_port_number); speed = 0; break; } } slave_dbg(slave->bond->dev, slave->dev, "Port %d Received link speed %d update from adapter\n", port->actor_port_number, speed); return speed; } /** * __get_duplex - get a port's duplex * @port: the port we're looking at * * Return @port's duplex in 802.3ad bitmask format. i.e.: * 0x01 if in full duplex * 0x00 otherwise */ static u8 __get_duplex(struct port *port) { struct slave *slave = port->slave; u8 retval = 0x0; /* handling a special case: when the configuration starts with * link down, it sets the duplex to 0. */ if (slave->link == BOND_LINK_UP) { switch (slave->duplex) { case DUPLEX_FULL: retval = 0x1; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status full duplex update from adapter\n", port->actor_port_number); break; case DUPLEX_HALF: default: retval = 0x0; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status NOT full duplex update from adapter\n", port->actor_port_number); break; } } return retval; } static void __ad_actor_update_port(struct port *port) { const struct bonding *bond = bond_get_bond_by_slave(port->slave); port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; } /* Conversions */ /** * __ad_timer_to_ticks - convert a given timer type to AD module ticks * @timer_type: which timer to operate * @par: timer parameter. see below * * If @timer_type is %current_while_timer, @par indicates long/short timer. * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, * %SLOW_PERIODIC_TIME. */ static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) { u16 retval = 0; /* to silence the compiler */ switch (timer_type) { case AD_CURRENT_WHILE_TIMER: /* for rx machine usage */ if (par) retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); else retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); break; case AD_ACTOR_CHURN_TIMER: /* for local churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_PERIODIC_TIMER: /* for periodic machine */ retval = (par*ad_ticks_per_sec); /* long timeout */ break; case AD_PARTNER_CHURN_TIMER: /* for remote churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_WAIT_WHILE_TIMER: /* for selection machine */ retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec); break; } return retval; } /* ================= ad_rx_machine helper functions ================== */ /** * __choose_matched - update a port's matched variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the matched variable, using parameter values from a * newly received lacpdu. Parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the actor. Matched is set to TRUE if all of these parameters * match and the PDU parameter partner_state.aggregation has the same value as * actor_oper_port_state.aggregation and lacp will actively maintain the link * in the aggregation. Matched is also set to TRUE if the value of * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates * an individual link and lacp will actively maintain the link. Otherwise, * matched is set to FALSE. LACP is considered to be actively maintaining the * link if either the PDU's actor_state.lacp_activity variable is TRUE or both * the actor's actor_oper_port_state.lacp_activity and the PDU's * partner_state.lacp_activity variables are TRUE. * * Note: the AD_PORT_MATCHED "variable" is not specified by 802.3ad; it is * used here to implement the language from 802.3ad 43.4.9 that requires * recordPDU to "match" the LACPDU parameters to the stored values. */ static void __choose_matched(struct lacpdu *lacpdu, struct port *port) { /* check if all parameters are alike * or this is individual link(aggregation == FALSE) * then update the state machine Matched variable. */ if (((ntohs(lacpdu->partner_port) == port->actor_port_number) && (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) && (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) && (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) && ((lacpdu->partner_state & LACP_STATE_AGGREGATION) == (port->actor_oper_port_state & LACP_STATE_AGGREGATION))) || ((lacpdu->actor_state & LACP_STATE_AGGREGATION) == 0) ) { port->sm_vars |= AD_PORT_MATCHED; } else { port->sm_vars &= ~AD_PORT_MATCHED; } } /** * __record_pdu - record parameters from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Record the parameter values for the Actor carried in a received lacpdu as * the current partner operational parameter values and sets * actor_oper_port_state.defaulted to FALSE. */ static void __record_pdu(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { struct port_params *partner = &port->partner_oper; __choose_matched(lacpdu, port); /* record the new parameter values for the partner * operational */ partner->port_number = ntohs(lacpdu->actor_port); partner->port_priority = ntohs(lacpdu->actor_port_priority); partner->system = lacpdu->actor_system; partner->system_priority = ntohs(lacpdu->actor_system_priority); partner->key = ntohs(lacpdu->actor_key); partner->port_state = lacpdu->actor_state; /* set actor_oper_port_state.defaulted to FALSE */ port->actor_oper_port_state &= ~LACP_STATE_DEFAULTED; /* set the partner sync. to on if the partner is sync, * and the port is matched */ if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & LACP_STATE_SYNCHRONIZATION)) { partner->port_state |= LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=1\n"); } else { partner->port_state &= ~LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=0\n"); } } } /** * __record_default - record default parameters * @port: the port we're looking at * * This function records the default parameter values for the partner carried * in the Partner Admin parameters as the current partner operational parameter * values and sets actor_oper_port_state.defaulted to TRUE. */ static void __record_default(struct port *port) { if (port) { /* record the partner admin parameters */ memcpy(&port->partner_oper, &port->partner_admin, sizeof(struct port_params)); /* set actor_oper_port_state.defaulted to true */ port->actor_oper_port_state |= LACP_STATE_DEFAULTED; } } /** * __update_selected - update a port's Selected variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the selected variable, using parameter values from a * newly received lacpdu. The parameter values for the Actor carried in the * received PDU are compared with the corresponding operational parameter * values for the ports partner. If one or more of the comparisons shows that * the value(s) received in the PDU differ from the current operational values, * then selected is set to FALSE and actor_oper_port_state.synchronization is * set to out_of_sync. Otherwise, selected remains unchanged. */ static void __update_selected(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { const struct port_params *partner = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (ntohs(lacpdu->actor_port) != partner->port_number || ntohs(lacpdu->actor_port_priority) != partner->port_priority || !MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) || ntohs(lacpdu->actor_system_priority) != partner->system_priority || ntohs(lacpdu->actor_key) != partner->key || (lacpdu->actor_state & LACP_STATE_AGGREGATION) != (partner->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_default_selected - update a port's Selected variable from Partner * @port: the port we're looking at * * This function updates the value of the selected variable, using the partner * administrative parameter values. The administrative values are compared with * the corresponding operational parameter values for the partner. If one or * more of the comparisons shows that the administrative value(s) differ from * the current operational values, then Selected is set to FALSE and * actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise, * Selected remains unchanged. */ static void __update_default_selected(struct port *port) { if (port) { const struct port_params *admin = &port->partner_admin; const struct port_params *oper = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (admin->port_number != oper->port_number || admin->port_priority != oper->port_priority || !MAC_ADDRESS_EQUAL(&admin->system, &oper->system) || admin->system_priority != oper->system_priority || admin->key != oper->key || (admin->port_state & LACP_STATE_AGGREGATION) != (oper->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_ntt - update a port's ntt variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Updates the value of the ntt variable, using parameter values from a newly * received lacpdu. The parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the Actor. If one or more of the comparisons shows that the * value(s) received in the PDU differ from the current operational values, * then ntt is set to TRUE. Otherwise, ntt remains unchanged. */ static void __update_ntt(struct lacpdu *lacpdu, struct port *port) { /* validate lacpdu and port */ if (lacpdu && port) { /* check if any parameter is different then * update the port->ntt. */ if ((ntohs(lacpdu->partner_port) != port->actor_port_number) || (ntohs(lacpdu->partner_port_priority) != port->actor_port_priority) || !MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) || (ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) || (ntohs(lacpdu->partner_key) != port->actor_oper_port_key) || ((lacpdu->partner_state & LACP_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY)) || ((lacpdu->partner_state & LACP_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)) || ((lacpdu->partner_state & LACP_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) || ((lacpdu->partner_state & LACP_STATE_AGGREGATION) != (port->actor_oper_port_state & LACP_STATE_AGGREGATION)) ) { port->ntt = true; } } } /** * __agg_ports_are_ready - check if all ports in an aggregator are ready * @aggregator: the aggregator we're looking at * */ static int __agg_ports_are_ready(struct aggregator *aggregator) { struct port *port; int retval = 1; if (aggregator) { /* scan all ports in this aggregator to verfy if they are * all ready. */ for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (!(port->sm_vars & AD_PORT_READY_N)) { retval = 0; break; } } } return retval; } /** * __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator * @aggregator: the aggregator we're looking at * @val: Should the ports' ready bit be set on or off * */ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) { struct port *port; for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (val) port->sm_vars |= AD_PORT_READY; else port->sm_vars &= ~AD_PORT_READY; } } static int __agg_active_ports(struct aggregator *agg) { struct port *port; int active = 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (port->is_enabled) active++; } return active; } /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at * */ static u32 __get_agg_bandwidth(struct aggregator *aggregator) { int nports = __agg_active_ports(aggregator); u32 bandwidth = 0; if (nports) { switch (__get_link_speed(aggregator->lag_ports)) { case AD_LINK_SPEED_1MBPS: bandwidth = nports; break; case AD_LINK_SPEED_10MBPS: bandwidth = nports * 10; break; case AD_LINK_SPEED_100MBPS: bandwidth = nports * 100; break; case AD_LINK_SPEED_1000MBPS: bandwidth = nports * 1000; break; case AD_LINK_SPEED_2500MBPS: bandwidth = nports * 2500; break; case AD_LINK_SPEED_5000MBPS: bandwidth = nports * 5000; break; case AD_LINK_SPEED_10000MBPS: bandwidth = nports * 10000; break; case AD_LINK_SPEED_14000MBPS: bandwidth = nports * 14000; break; case AD_LINK_SPEED_20000MBPS: bandwidth = nports * 20000; break; case AD_LINK_SPEED_25000MBPS: bandwidth = nports * 25000; break; case AD_LINK_SPEED_40000MBPS: bandwidth = nports * 40000; break; case AD_LINK_SPEED_50000MBPS: bandwidth = nports * 50000; break; case AD_LINK_SPEED_56000MBPS: bandwidth = nports * 56000; break; case AD_LINK_SPEED_100000MBPS: bandwidth = nports * 100000; break; case AD_LINK_SPEED_200000MBPS: bandwidth = nports * 200000; break; case AD_LINK_SPEED_400000MBPS: bandwidth = nports * 400000; break; case AD_LINK_SPEED_800000MBPS: bandwidth = nports * 800000; break; default: bandwidth = 0; /* to silence the compiler */ } } return bandwidth; } /** * __get_active_agg - get the current active aggregator * @aggregator: the aggregator we're looking at * * Caller must hold RCU lock. */ static struct aggregator *__get_active_agg(struct aggregator *aggregator) { struct bonding *bond = aggregator->slave->bond; struct list_head *iter; struct slave *slave; bond_for_each_slave_rcu(bond, slave, iter) if (SLAVE_AD_INFO(slave)->aggregator.is_active) return &(SLAVE_AD_INFO(slave)->aggregator); return NULL; } /** * __update_lacpdu_from_port - update a port's lacpdu fields * @port: the port we're looking at */ static inline void __update_lacpdu_from_port(struct port *port) { struct lacpdu *lacpdu = &port->lacpdu; const struct port_params *partner = &port->partner_oper; /* update current actual Actor parameters * lacpdu->subtype initialized * lacpdu->version_number initialized * lacpdu->tlv_type_actor_info initialized * lacpdu->actor_information_length initialized */ lacpdu->actor_system_priority = htons(port->actor_system_priority); lacpdu->actor_system = port->actor_system; lacpdu->actor_key = htons(port->actor_oper_port_key); lacpdu->actor_port_priority = htons(port->actor_port_priority); lacpdu->actor_port = htons(port->actor_port_number); lacpdu->actor_state = port->actor_oper_port_state; slave_dbg(port->slave->bond->dev, port->slave->dev, "update lacpdu: actor port state %x\n", port->actor_oper_port_state); /* lacpdu->reserved_3_1 initialized * lacpdu->tlv_type_partner_info initialized * lacpdu->partner_information_length initialized */ lacpdu->partner_system_priority = htons(partner->system_priority); lacpdu->partner_system = partner->system; lacpdu->partner_key = htons(partner->key); lacpdu->partner_port_priority = htons(partner->port_priority); lacpdu->partner_port = htons(partner->port_number); lacpdu->partner_state = partner->port_state; /* lacpdu->reserved_3_2 initialized * lacpdu->tlv_type_collector_info initialized * lacpdu->collector_information_length initialized * collector_max_delay initialized * reserved_12[12] initialized * tlv_type_terminator initialized * terminator_length initialized * reserved_50[50] initialized */ } /* ================= main 802.3ad protocol code ========================= */ /** * ad_lacpdu_send - send out a lacpdu packet on a given port * @port: the port we're looking at * * Returns: 0 on success * < 0 on error */ static int ad_lacpdu_send(struct port *port) { struct slave *slave = port->slave; struct sk_buff *skb; struct lacpdu_header *lacpdu_header; int length = sizeof(struct lacpdu_header); skb = dev_alloc_skb(length); if (!skb) return -ENOMEM; atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.lacpdu_tx); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; skb->priority = TC_PRIO_CONTROL; lacpdu_header = skb_put(skb, length); ether_addr_copy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback lacpdus in receive. */ ether_addr_copy(lacpdu_header->hdr.h_source, slave->perm_hwaddr); lacpdu_header->hdr.h_proto = PKT_TYPE_LACPDU; lacpdu_header->lacpdu = port->lacpdu; dev_queue_xmit(skb); return 0; } /** * ad_marker_send - send marker information/response on a given port * @port: the port we're looking at * @marker: marker data to send * * Returns: 0 on success * < 0 on error */ static int ad_marker_send(struct port *port, struct bond_marker *marker) { struct slave *slave = port->slave; struct sk_buff *skb; struct bond_marker_header *marker_header; int length = sizeof(struct bond_marker_header); skb = dev_alloc_skb(length + 16); if (!skb) return -ENOMEM; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_tx); break; case AD_MARKER_RESPONSE_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_resp_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_resp_tx); break; } skb_reserve(skb, 16); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; marker_header = skb_put(skb, length); ether_addr_copy(marker_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback MARKERs in receive. */ ether_addr_copy(marker_header->hdr.h_source, slave->perm_hwaddr); marker_header->hdr.h_proto = PKT_TYPE_LACPDU; marker_header->marker = *marker; dev_queue_xmit(skb); return 0; } /** * ad_mux_machine - handle a port's mux state machine * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_mux_machine(struct port *port, bool *update_slave_arr) { mux_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_mux_state; if (port->sm_vars & AD_PORT_BEGIN) { port->sm_mux_state = AD_MUX_DETACHED; } else { switch (port->sm_mux_state) { case AD_MUX_DETACHED: if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) /* if SELECTED or STANDBY */ port->sm_mux_state = AD_MUX_WAITING; break; case AD_MUX_WAITING: /* if SELECTED == FALSE return to DETACH state */ if (!(port->sm_vars & AD_PORT_SELECTED)) { port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the Selection Logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; break; } /* check if the wait_while_timer expired */ if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) port->sm_vars |= AD_PORT_READY_N; /* in order to withhold the selection logic to check * all ports READY_N value every callback cycle to * update ready variable, we check READY_N and update * READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); /* if the wait_while_timer expired, and the port is * in READY state, move to ATTACHED state */ if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) port->sm_mux_state = AD_MUX_ATTACHED; break; case AD_MUX_ATTACHED: /* check also if agg_select_timer expired (so the * edable port will take place only after this timer) */ if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { if (port->aggregator->is_active) port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING; } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { /* if UNSELECTED or STANDBY */ port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the selection logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; } else if (port->aggregator->is_active) { port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; } break; case AD_MUX_COLLECTING_DISTRIBUTING: if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) { port->sm_mux_state = AD_MUX_ATTACHED; } else { /* if port state hasn't changed make * sure that a collecting distributing * port in an active aggregator is enabled */ if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) { __enable_port(port); *update_slave_arr = true; } } break; default: break; } } /* check if the state machine was changed */ if (port->sm_mux_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Mux Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_mux_state); switch (port->sm_mux_state) { case AD_MUX_DETACHED: port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; ad_disable_collecting_distributing(port, update_slave_arr); port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->ntt = true; break; case AD_MUX_WAITING: port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); break; case AD_MUX_ATTACHED: if (port->aggregator->is_active) port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; else port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; ad_disable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING_DISTRIBUTING: port->actor_oper_port_state |= LACP_STATE_COLLECTING; port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; default: break; } } } /** * ad_rx_machine - handle a port's rx State Machine * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * If lacpdu arrived, stop previous timer (if exists) and set the next state as * CURRENT. If timer expired set the state machine in the proper state. * In other cases, this function checks if we need to switch to other state. */ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) { rx_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_rx_state; if (lacpdu) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.lacpdu_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.lacpdu_rx); } /* check if state machine should change state */ /* first, check if port was reinitialized */ if (port->sm_vars & AD_PORT_BEGIN) { port->sm_rx_state = AD_RX_INITIALIZE; port->sm_vars |= AD_PORT_CHURNED; /* check if port is not enabled */ } else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled) port->sm_rx_state = AD_RX_PORT_DISABLED; /* check if new lacpdu arrived */ else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { if (port->sm_rx_state != AD_RX_CURRENT) port->sm_vars |= AD_PORT_CHURNED; port->sm_rx_timer_counter = 0; port->sm_rx_state = AD_RX_CURRENT; } else { /* if timer is on, and if it is expired */ if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { switch (port->sm_rx_state) { case AD_RX_EXPIRED: port->sm_rx_state = AD_RX_DEFAULTED; break; case AD_RX_CURRENT: port->sm_rx_state = AD_RX_EXPIRED; break; default: break; } } else { /* if no lacpdu arrived and no timer is on */ switch (port->sm_rx_state) { case AD_RX_PORT_DISABLED: if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) port->sm_rx_state = AD_RX_EXPIRED; else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) port->sm_rx_state = AD_RX_LACP_DISABLED; break; default: break; } } } /* check if the State machine was changed or new lacpdu arrived */ if ((port->sm_rx_state != last_state) || (lacpdu)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Rx Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_rx_state); switch (port->sm_rx_state) { case AD_RX_INITIALIZE: if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)) port->sm_vars &= ~AD_PORT_LACP_ENABLED; else port->sm_vars |= AD_PORT_LACP_ENABLED; port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; port->sm_rx_state = AD_RX_PORT_DISABLED; fallthrough; case AD_RX_PORT_DISABLED: port->sm_vars &= ~AD_PORT_MATCHED; break; case AD_RX_LACP_DISABLED: port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->partner_oper.port_state &= ~LACP_STATE_AGGREGATION; port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_EXPIRED: /* Reset of the Synchronization flag (Standard 43.4.12) * This reset cause to disable this port in the * COLLECTING_DISTRIBUTING state of the mux machine in * case of EXPIRED even if LINK_DOWN didn't arrive for * the port. */ port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->sm_vars &= ~AD_PORT_MATCHED; port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT; port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY; port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); port->actor_oper_port_state |= LACP_STATE_EXPIRED; port->sm_vars |= AD_PORT_CHURNED; break; case AD_RX_DEFAULTED: __update_default_selected(port); __record_default(port); port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_CURRENT: /* detect loopback situation */ if (MAC_ADDRESS_EQUAL(&(lacpdu->actor_system), &(port->actor_system))) { slave_err(port->slave->bond->dev, port->slave->dev, "An illegal loopback occurred on slave\n" "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n"); return; } __update_selected(lacpdu, port); __update_ntt(lacpdu, port); __record_pdu(lacpdu, port); port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; default: break; } } } /** * ad_churn_machine - handle port churn's state machine * @port: the port we're looking at * */ static void ad_churn_machine(struct port *port) { if (port->sm_vars & AD_PORT_CHURNED) { port->sm_vars &= ~AD_PORT_CHURNED; port->sm_churn_actor_state = AD_CHURN_MONITOR; port->sm_churn_partner_state = AD_CHURN_MONITOR; port->sm_churn_actor_timer_counter = __ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0); port->sm_churn_partner_timer_counter = __ad_timer_to_ticks(AD_PARTNER_CHURN_TIMER, 0); return; } if (port->sm_churn_actor_timer_counter && !(--port->sm_churn_actor_timer_counter) && port->sm_churn_actor_state == AD_CHURN_MONITOR) { if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_actor_state = AD_NO_CHURN; } else { port->churn_actor_count++; port->sm_churn_actor_state = AD_CHURN; } } if (port->sm_churn_partner_timer_counter && !(--port->sm_churn_partner_timer_counter) && port->sm_churn_partner_state == AD_CHURN_MONITOR) { if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_partner_state = AD_NO_CHURN; } else { port->churn_partner_count++; port->sm_churn_partner_state = AD_CHURN; } } } /** * ad_tx_machine - handle a port's tx state machine * @port: the port we're looking at */ static void ad_tx_machine(struct port *port) { /* check if tx timer expired, to verify that we do not send more than * 3 packets per second */ if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { /* check if there is something to send */ if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { __update_lacpdu_from_port(port); if (ad_lacpdu_send(port) >= 0) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent LACPDU on port %d\n", port->actor_port_number); /* mark ntt as false, so it will not be sent * again until demanded */ port->ntt = false; } } /* restart tx timer(to verify that we will not exceed * AD_MAX_TX_IN_SECOND */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; } } /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ static void ad_periodic_machine(struct port *port, struct bond_params *bond_params) { periodic_states_t last_state; /* keep current state machine state to compare later if it was changed */ last_state = port->sm_periodic_state; /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || !bond_params->lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ else if (port->sm_periodic_timer_counter) { /* check if periodic state machine expired */ if (!(--port->sm_periodic_timer_counter)) { /* if expired then do tx */ port->sm_periodic_state = AD_PERIODIC_TX; } else { /* If not expired, check if there is some new timeout * parameter from the partner state */ switch (port->sm_periodic_state) { case AD_FAST_PERIODIC: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; break; case AD_SLOW_PERIODIC: if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) { port->sm_periodic_timer_counter = 0; port->sm_periodic_state = AD_PERIODIC_TX; } break; default: break; } } } else { switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_state = AD_FAST_PERIODIC; break; case AD_PERIODIC_TX: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; else port->sm_periodic_state = AD_FAST_PERIODIC; break; default: break; } } /* check if the state machine was changed */ if (port->sm_periodic_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_periodic_state); switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_timer_counter = 0; break; case AD_FAST_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; break; case AD_SLOW_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; break; case AD_PERIODIC_TX: port->ntt = true; break; default: break; } } } /** * ad_port_selection_logic - select aggregation groups * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Select aggregation groups, and assign each port for it's aggregetor. The * selection logic is called in the inititalization (after all the handshkes), * and after every lacpdu receive (if selected is off). */ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) { struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; struct port *last_port = NULL, *curr_port; struct list_head *iter; struct bonding *bond; struct slave *slave; int found = 0; /* if the port is already Selected, do nothing */ if (port->sm_vars & AD_PORT_SELECTED) return; bond = __get_bond_by_port(port); /* if the port is connected to other aggregator, detach it */ if (port->aggregator) { /* detach the port from its former aggregator */ temp_aggregator = port->aggregator; for (curr_port = temp_aggregator->lag_ports; curr_port; last_port = curr_port, curr_port = curr_port->next_port_in_aggregator) { if (curr_port == port) { temp_aggregator->num_of_ports--; /* if it is the first port attached to the * aggregator */ if (!last_port) { temp_aggregator->lag_ports = port->next_port_in_aggregator; } else { /* not the first port attached to the * aggregator */ last_port->next_port_in_aggregator = port->next_port_in_aggregator; } /* clear the port's relations to this * aggregator */ port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->actor_port_aggregator_identifier = 0; slave_dbg(bond->dev, port->slave->dev, "Port %d left LAG %d\n", port->actor_port_number, temp_aggregator->aggregator_identifier); /* if the aggregator is empty, clear its * parameters, and set it ready to be attached */ if (!temp_aggregator->lag_ports) ad_clear_agg(temp_aggregator); break; } } if (!curr_port) { /* meaning: the port was related to an aggregator * but was not on the aggregator port list */ net_warn_ratelimited("%s: (slave %s): Warning: Port %d was related to aggregator %d but was not on its port list\n", port->slave->bond->dev->name, port->slave->dev->name, port->actor_port_number, port->aggregator->aggregator_identifier); } } /* search on all aggregators for a suitable aggregator for this port */ bond_for_each_slave(bond, slave, iter) { aggregator = &(SLAVE_AD_INFO(slave)->aggregator); /* keep a free aggregator for later use(if needed) */ if (!aggregator->lag_ports) { if (!free_aggregator) free_aggregator = aggregator; continue; } /* check if current aggregator suits us */ if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */ MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) && (aggregator->partner_system_priority == port->partner_oper.system_priority) && (aggregator->partner_oper_aggregator_key == port->partner_oper.key) ) && ((!MAC_ADDRESS_EQUAL(&(port->partner_oper.system), &(null_mac_addr)) && /* partner answers */ !aggregator->is_individual) /* but is not individual OR */ ) ) { /* attach to the founded aggregator */ port->aggregator = aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; port->next_port_in_aggregator = aggregator->lag_ports; port->aggregator->num_of_ports++; aggregator->lag_ports = port; slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; found = 1; break; } } /* the port couldn't find an aggregator - attach it to a new * aggregator */ if (!found) { if (free_aggregator) { /* assign port a new aggregator */ port->aggregator = free_aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; /* update the new aggregator's parameters * if port was responsed from the end-user */ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS) /* if port is full duplex */ port->aggregator->is_individual = false; else port->aggregator->is_individual = true; port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; port->aggregator->partner_system = port->partner_oper.system; port->aggregator->partner_system_priority = port->partner_oper.system_priority; port->aggregator->partner_oper_aggregator_key = port->partner_oper.key; port->aggregator->receive_state = 1; port->aggregator->transmit_state = 1; port->aggregator->lag_ports = port; port->aggregator->num_of_ports++; /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); } else { slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", port->actor_port_number); return; } } /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE * in all aggregator's ports, else set ready=FALSE in all * aggregator's ports */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, update_slave_arr); if (!port->aggregator->is_active) port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; } /* Decide if "agg" is a better choice for the new active aggregator that * the current best, according to the ad_select policy. */ static struct aggregator *ad_agg_selection_test(struct aggregator *best, struct aggregator *curr) { /* 0. If no best, select current. * * 1. If the current agg is not individual, and the best is * individual, select current. * * 2. If current agg is individual and the best is not, keep best. * * 3. Therefore, current and best are both individual or both not * individual, so: * * 3a. If current agg partner replied, and best agg partner did not, * select current. * * 3b. If current agg partner did not reply and best agg partner * did reply, keep best. * * 4. Therefore, current and best both have partner replies or * both do not, so perform selection policy: * * BOND_AD_COUNT: Select by count of ports. If count is equal, * select by bandwidth. * * BOND_AD_STABLE, BOND_AD_BANDWIDTH: Select by bandwidth. */ if (!best) return curr; if (!curr->is_individual && best->is_individual) return curr; if (curr->is_individual && !best->is_individual) return best; if (__agg_has_partner(curr) && !__agg_has_partner(best)) return curr; if (!__agg_has_partner(curr) && __agg_has_partner(best)) return best; switch (__get_agg_selection_mode(curr->lag_ports)) { case BOND_AD_COUNT: if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; if (__agg_active_ports(curr) < __agg_active_ports(best)) return best; fallthrough; case BOND_AD_STABLE: case BOND_AD_BANDWIDTH: if (__get_agg_bandwidth(curr) > __get_agg_bandwidth(best)) return curr; break; default: net_warn_ratelimited("%s: (slave %s): Impossible agg select mode %d\n", curr->slave->bond->dev->name, curr->slave->dev->name, __get_agg_selection_mode(curr->lag_ports)); break; } return best; } static int agg_device_up(const struct aggregator *agg) { struct port *port = agg->lag_ports; if (!port) return 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (netif_running(port->slave->dev) && netif_carrier_ok(port->slave->dev)) return 1; } return 0; } /** * ad_agg_selection_logic - select an aggregation group for a team * @agg: the aggregator we're looking at * @update_slave_arr: Does slave array need update? * * It is assumed that only one aggregator may be selected for a team. * * The logic of this function is to select the aggregator according to * the ad_select policy: * * BOND_AD_STABLE: select the aggregator with the most ports attached to * it, and to reselect the active aggregator only if the previous * aggregator has no more ports related to it. * * BOND_AD_BANDWIDTH: select the aggregator with the highest total * bandwidth, and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * BOND_AD_COUNT: select the aggregator with largest number of ports * (slaves), and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * FIXME: this function MUST be called with the first agg in the bond, or * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. */ static void ad_agg_selection_logic(struct aggregator *agg, bool *update_slave_arr) { struct aggregator *best, *active, *origin; struct bonding *bond = agg->slave->bond; struct list_head *iter; struct slave *slave; struct port *port; rcu_read_lock(); origin = agg; active = __get_active_agg(agg); best = (active && agg_device_up(active)) ? active : NULL; bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); agg->is_active = 0; if (__agg_active_ports(agg) && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } if (best && __get_agg_selection_mode(best->lag_ports) == BOND_AD_STABLE) { /* For the STABLE policy, don't replace the old active * aggregator if it's still active (it has an answering * partner) or if both the best and active don't have an * answering partner. */ if (active && active->lag_ports && __agg_active_ports(active) && (__agg_has_partner(active) || (!__agg_has_partner(active) && !__agg_has_partner(best)))) { if (!(!active->actor_oper_aggregator_key && best->actor_oper_aggregator_key)) { best = NULL; active->is_active = 1; } } } if (best && (best == active)) { best = NULL; active->is_active = 1; } /* if there is new best aggregator, activate it */ if (best) { netdev_dbg(bond->dev, "(slave %s): best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); netdev_dbg(bond->dev, "(slave %s): best ports %p slave %p\n", best->slave ? best->slave->dev->name : "NULL", best->lag_ports, best->slave); bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); slave_dbg(bond->dev, slave->dev, "Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", agg->aggregator_identifier, agg->num_of_ports, agg->actor_oper_aggregator_key, agg->partner_oper_aggregator_key, agg->is_individual, agg->is_active); } /* check if any partner replies */ if (best->is_individual) net_warn_ratelimited("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", bond->dev->name); best->is_active = 1; netdev_dbg(bond->dev, "(slave %s): LAG %d chosen as the active LAG\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier); netdev_dbg(bond->dev, "(slave %s): Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); /* disable the ports that were related to the former * active_aggregator */ if (active) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __disable_port(port); } } /* Slave array needs update. */ *update_slave_arr = true; } /* if the selected aggregator is of join individuals * (partner_system is NULL), enable their ports */ active = __get_active_agg(origin); if (active) { if (!__agg_has_partner(active)) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __enable_port(port); } *update_slave_arr = true; } } rcu_read_unlock(); bond_3ad_set_carrier(bond); } /** * ad_clear_agg - clear a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_clear_agg(struct aggregator *aggregator) { if (aggregator) { aggregator->is_individual = false; aggregator->actor_admin_aggregator_key = 0; aggregator->actor_oper_aggregator_key = 0; eth_zero_addr(aggregator->partner_system.mac_addr_value); aggregator->partner_system_priority = 0; aggregator->partner_oper_aggregator_key = 0; aggregator->receive_state = 0; aggregator->transmit_state = 0; aggregator->lag_ports = NULL; aggregator->is_active = 0; aggregator->num_of_ports = 0; pr_debug("%s: LAG %d was cleared\n", aggregator->slave ? aggregator->slave->dev->name : "NULL", aggregator->aggregator_identifier); } } /** * ad_initialize_agg - initialize a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_initialize_agg(struct aggregator *aggregator) { if (aggregator) { ad_clear_agg(aggregator); eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value); aggregator->aggregator_identifier = 0; aggregator->slave = NULL; } } /** * ad_initialize_port - initialize a given port's parameters * @port: the port we're looking at * @lacp_fast: boolean. whether fast periodic should be used */ static void ad_initialize_port(struct port *port, int lacp_fast) { static const struct port_params tmpl = { .system_priority = 0xffff, .key = 1, .port_number = 1, .port_priority = 0xff, .port_state = 1, }; static const struct lacpdu lacpdu = { .subtype = 0x01, .version_number = 0x01, .tlv_type_actor_info = 0x01, .actor_information_length = 0x14, .tlv_type_partner_info = 0x02, .partner_information_length = 0x14, .tlv_type_collector_info = 0x03, .collector_information_length = 0x10, .collector_max_delay = htons(AD_COLLECTOR_MAX_DELAY), }; if (port) { port->actor_port_priority = 0xff; port->actor_port_aggregator_identifier = 0; port->ntt = false; port->actor_admin_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; port->actor_oper_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; memcpy(&port->partner_admin, &tmpl, sizeof(tmpl)); memcpy(&port->partner_oper, &tmpl, sizeof(tmpl)); port->is_enabled = true; /* private parameters */ port->sm_vars = AD_PORT_BEGIN | AD_PORT_LACP_ENABLED; port->sm_rx_state = 0; port->sm_rx_timer_counter = 0; port->sm_periodic_state = 0; port->sm_periodic_timer_counter = 0; port->sm_mux_state = 0; port->sm_mux_timer_counter = 0; port->sm_tx_state = 0; port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->transaction_id = 0; port->sm_churn_actor_timer_counter = 0; port->sm_churn_actor_state = 0; port->churn_actor_count = 0; port->sm_churn_partner_timer_counter = 0; port->sm_churn_partner_state = 0; port->churn_partner_count = 0; memcpy(&port->lacpdu, &lacpdu, sizeof(lacpdu)); } } /** * ad_enable_collecting_distributing - enable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Enable @port if it's in an active aggregator */ static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator->is_active) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Enabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_port(port); /* Slave array needs update */ *update_slave_arr = true; } } /** * ad_disable_collecting_distributing - disable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator && !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system), &(null_mac_addr))) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __disable_port(port); /* Slave array needs an update */ *update_slave_arr = true; } } /** * ad_marker_info_received - handle receive of a Marker information frame * @marker_info: Marker info received * @port: the port we're looking at */ static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port) { struct bond_marker marker; atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_rx); /* copy the received marker data to the response marker */ memcpy(&marker, marker_info, sizeof(struct bond_marker)); /* change the marker subtype to marker response */ marker.tlv_type = AD_MARKER_RESPONSE_SUBTYPE; /* send the marker response */ if (ad_marker_send(port, &marker) >= 0) slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent Marker Response on port %d\n", port->actor_port_number); } /** * ad_marker_response_received - handle receive of a marker response frame * @marker: marker PDU received * @port: the port we're looking at * * This function does nothing since we decided not to implement send and handle * response for marker PDU's, in this stage, but only to respond to marker * information. */ static void ad_marker_response_received(struct bond_marker *marker, struct port *port) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_resp_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_resp_rx); /* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */ } /* ========= AD exported functions to the main bonding code ========= */ /* Check aggregators status in team every T seconds */ #define AD_AGGREGATOR_SELECTION_TIMER 8 /** * bond_3ad_initiate_agg_selection - initate aggregator selection * @bond: bonding struct * @timeout: timeout value to set * * Set the aggregation selection timer, to initiate an agg selection in * the very near future. Called during first initialization, and during * any down to up transitions of the bond. */ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) { atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout); } /** * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures * @bond: bonding struct to work on * * Can be called only after the mac address of the bond is set. */ void bond_3ad_initialize(struct bonding *bond) { BOND_AD_INFO(bond).aggregator_identifier = 0; BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); bond_3ad_initiate_agg_selection(bond, AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); } /** * bond_3ad_bind_slave - initialize a slave's port * @slave: slave struct to work on * * Returns: 0 on success * < 0 on error */ void bond_3ad_bind_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); struct port *port; struct aggregator *aggregator; /* check that the slave has not been initialized yet. */ if (SLAVE_AD_INFO(slave)->port.slave != slave) { /* port initialization */ port = &(SLAVE_AD_INFO(slave)->port); ad_initialize_port(port, bond->params.lacp_fast); port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; /* key is determined according to the link speed, duplex and * user key */ port->actor_admin_port_key = bond->params.ad_user_port_key << 6; ad_update_actor_keys(port, false); /* actor system is the bond's system */ __ad_actor_update_port(port); /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; __disable_port(port); /* aggregator initialization */ aggregator = &(SLAVE_AD_INFO(slave)->aggregator); ad_initialize_agg(aggregator); aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier; aggregator->slave = slave; aggregator->is_active = 0; aggregator->num_of_ports = 0; } } /** * bond_3ad_unbind_slave - deinitialize a slave's port * @slave: slave struct to work on * * Search for the aggregator that is related to this port, remove the * aggregator and assign another aggregator for other port related to it * (if any), and remove the port. */ void bond_3ad_unbind_slave(struct slave *slave) { struct port *port, *prev_port, *temp_port; struct aggregator *aggregator, *new_aggregator, *temp_aggregator; int select_new_active_agg = 0; struct bonding *bond = slave->bond; struct slave *slave_iter; struct list_head *iter; bool dummy_slave_update; /* Ignore this value as caller updates array */ /* Sync against bond_3ad_state_machine_handler() */ spin_lock_bh(&bond->mode_lock); aggregator = &(SLAVE_AD_INFO(slave)->aggregator); port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(bond->dev, slave->dev, "Trying to unbind an uninitialized port\n"); goto out; } slave_dbg(bond->dev, slave->dev, "Unbinding Link Aggregation Group %d\n", aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~LACP_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); /* check if this aggregator is occupied */ if (aggregator->lag_ports) { /* check if there are other ports related to this aggregator * except the port related to this slave(thats ensure us that * there is a reason to search for new aggregator, and that we * will find one */ if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { /* find new aggregator for the related port(s) */ bond_for_each_slave(bond, slave_iter, iter) { new_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); /* if the new aggregator is empty, or it is * connected to our port only */ if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) break; } if (!slave_iter) new_aggregator = NULL; /* if new aggregator found, copy the aggregator's * parameters and connect the related lag_ports to the * new aggregator */ if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { slave_dbg(bond->dev, slave->dev, "Some port(s) related to LAG %d - replacing with LAG %d\n", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier); if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); select_new_active_agg = 1; } new_aggregator->is_individual = aggregator->is_individual; new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key; new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key; new_aggregator->partner_system = aggregator->partner_system; new_aggregator->partner_system_priority = aggregator->partner_system_priority; new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key; new_aggregator->receive_state = aggregator->receive_state; new_aggregator->transmit_state = aggregator->transmit_state; new_aggregator->lag_ports = aggregator->lag_ports; new_aggregator->is_active = aggregator->is_active; new_aggregator->num_of_ports = aggregator->num_of_ports; /* update the information that is written on * the ports about the aggregator */ for (temp_port = aggregator->lag_ports; temp_port; temp_port = temp_port->next_port_in_aggregator) { temp_port->aggregator = new_aggregator; temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; } ad_clear_agg(aggregator); if (select_new_active_agg) ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } else { slave_warn(bond->dev, slave->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); } } else { /* in case that the only port related to this * aggregator is the one we want to remove */ select_new_active_agg = aggregator->is_active; ad_clear_agg(aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ temp_aggregator = __get_first_agg(port); if (temp_aggregator) ad_agg_selection_logic(temp_aggregator, &dummy_slave_update); } } } slave_dbg(bond->dev, slave->dev, "Unbinding port %d\n", port->actor_port_number); /* find the aggregator that this port is connected to */ bond_for_each_slave(bond, slave_iter, iter) { temp_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); prev_port = NULL; /* search the port in the aggregator's related ports */ for (temp_port = temp_aggregator->lag_ports; temp_port; prev_port = temp_port, temp_port = temp_port->next_port_in_aggregator) { if (temp_port == port) { /* the aggregator found - detach the port from * this aggregator */ if (prev_port) prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator; else temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; if (temp_aggregator->num_of_ports == 0) ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } } break; } } } port->slave = NULL; out: spin_unlock_bh(&bond->mode_lock); } /** * bond_3ad_update_ad_actor_settings - reflect change of actor settings to ports * @bond: bonding struct to work on * * If an ad_actor setting gets changed we need to update the individual port * settings so the bond device will use the new values when it gets upped. */ void bond_3ad_update_ad_actor_settings(struct bonding *bond) { struct list_head *iter; struct slave *slave; ASSERT_RTNL(); BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { struct port *port = &(SLAVE_AD_INFO(slave))->port; __ad_actor_update_port(port); port->ntt = true; } spin_unlock_bh(&bond->mode_lock); } /** * bond_agg_timer_advance - advance agg_select_timer * @bond: bonding structure * * Return true when agg_select_timer reaches 0. */ static bool bond_agg_timer_advance(struct bonding *bond) { int val, nval; while (1) { val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer); if (!val) return false; nval = val - 1; if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer, val, nval) == val) break; } return nval == 0; } /** * bond_3ad_state_machine_handler - handle state machines timeout * @work: work context to fetch bonding struct to work on from * * The state machine handling concept in this module is to check every tick * which state machine should operate any function. The execution order is * round robin, so when we have an interaction between state machines, the * reply of one to each other might be delayed until next tick. * * This function also complete the initialization when the agg_select_timer * times out, and it selects an aggregator for the ports that are yet not * related to any aggregator, and selects the active aggregator for a bond. */ void bond_3ad_state_machine_handler(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, ad_work.work); struct aggregator *aggregator; struct list_head *iter; struct slave *slave; struct port *port; bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; bool update_slave_arr = false; /* Lock to protect data accessed by all (e.g., port->sm_vars) and * against running with bond_3ad_unbind_slave. ad_rx_machine may run * concurrently due to incoming LACPDU as well. */ spin_lock_bh(&bond->mode_lock); rcu_read_lock(); /* check if there are any slaves */ if (!bond_has_slaves(bond)) goto re_arm; if (bond_agg_timer_advance(bond)) { slave = bond_first_slave_rcu(bond); port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; /* select the active aggregator for the bond */ if (port) { if (!port->slave) { net_warn_ratelimited("%s: Warning: bond's first port is uninitialized\n", bond->dev->name); goto re_arm; } aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, &update_slave_arr); } bond_3ad_set_carrier(bond); } /* for each port run the state machines */ bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: Found an uninitialized port\n", bond->dev->name); goto re_arm; } ad_rx_machine(NULL, port); ad_periodic_machine(port, &bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); ad_churn_machine(port); /* turn off the BEGIN bit, since we already handled it */ if (port->sm_vars & AD_PORT_BEGIN) port->sm_vars &= ~AD_PORT_BEGIN; } re_arm: bond_for_each_slave_rcu(bond, slave, iter) { if (slave->should_notify) { should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; break; } } rcu_read_unlock(); spin_unlock_bh(&bond->mode_lock); if (update_slave_arr) bond_slave_arr_work_rearm(bond, 0); if (should_notify_rtnl && rtnl_trylock()) { bond_slave_state_notify(bond); rtnl_unlock(); } queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); } /** * bond_3ad_rx_indication - handle a received frame * @lacpdu: received lacpdu * @slave: slave struct to work on * * It is assumed that frames that were sent on this NIC don't returned as new * received frames (loopback). Since only the payload is given to this * function, it check for loopback. */ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave) { struct bonding *bond = slave->bond; int ret = RX_HANDLER_ANOTHER; struct bond_marker *marker; struct port *port; atomic64_t *stat; port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", slave->dev->name, slave->bond->dev->name); return ret; } switch (lacpdu->subtype) { case AD_TYPE_LACPDU: ret = RX_HANDLER_CONSUMED; slave_dbg(slave->bond->dev, slave->dev, "Received LACPDU on port %d\n", port->actor_port_number); /* Protect against concurrent state machines */ spin_lock(&slave->bond->mode_lock); ad_rx_machine(lacpdu, port); spin_unlock(&slave->bond->mode_lock); break; case AD_TYPE_MARKER: ret = RX_HANDLER_CONSUMED; /* No need to convert fields to Little Endian since we * don't use the marker's fields. */ marker = (struct bond_marker *)lacpdu; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Information on port %d\n", port->actor_port_number); ad_marker_info_received(marker, port); break; case AD_MARKER_RESPONSE_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Response on port %d\n", port->actor_port_number); ad_marker_response_received(marker, port); break; default: slave_dbg(slave->bond->dev, slave->dev, "Received an unknown Marker subtype on port %d\n", port->actor_port_number); stat = &SLAVE_AD_INFO(slave)->stats.marker_unknown_rx; atomic64_inc(stat); stat = &BOND_AD_INFO(bond).stats.marker_unknown_rx; atomic64_inc(stat); } break; default: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_unknown_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_unknown_rx); } return ret; } /** * ad_update_actor_keys - Update the oper / admin keys for a port based on * its current speed and duplex settings. * * @port: the port we'are looking at * @reset: Boolean to just reset the speed and the duplex part of the key * * The logic to change the oper / admin keys is: * (a) A full duplex port can participate in LACP with partner. * (b) When the speed is changed, LACP need to be reinitiated. */ static void ad_update_actor_keys(struct port *port, bool reset) { u8 duplex = 0; u16 ospeed = 0, speed = 0; u16 old_oper_key = port->actor_oper_port_key; port->actor_admin_port_key &= ~(AD_SPEED_KEY_MASKS|AD_DUPLEX_KEY_MASKS); if (!reset) { speed = __get_link_speed(port); ospeed = (old_oper_key & AD_SPEED_KEY_MASKS) >> 1; duplex = __get_duplex(port); port->actor_admin_port_key |= (speed << 1) | duplex; } port->actor_oper_port_key = port->actor_admin_port_key; if (old_oper_key != port->actor_oper_port_key) { /* Only 'duplex' port participates in LACP */ if (duplex) port->sm_vars |= AD_PORT_LACP_ENABLED; else port->sm_vars &= ~AD_PORT_LACP_ENABLED; if (!reset) { if (!speed) { slave_err(port->slave->bond->dev, port->slave->dev, "speed changed to 0 on port %d\n", port->actor_port_number); } else if (duplex && ospeed != speed) { /* Speed change restarts LACP state-machine */ port->sm_vars |= AD_PORT_BEGIN; } } } } /** * bond_3ad_adapter_speed_duplex_changed - handle a slave's speed / duplex * change indication * * @slave: slave struct to work on * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave) { struct port *port; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "speed/duplex changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); ad_update_actor_keys(port, false); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed speed/duplex\n", port->actor_port_number); } /** * bond_3ad_handle_link_change - handle a slave's link status change indication * @slave: slave struct to work on * @link: whether the link is now up or down * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_handle_link_change(struct slave *slave, char link) { struct aggregator *agg; struct port *port; bool dummy; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "link status changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); /* on link down we are zeroing duplex and speed since * some of the adaptors(ce1000.lan) report full duplex/speed * instead of N/A(duplex) / 0(speed). * * on link up we are forcing recheck on the duplex and speed since * some of he adaptors(ce1000.lan) report. */ if (link == BOND_LINK_UP) { port->is_enabled = true; ad_update_actor_keys(port, false); } else { /* link has failed */ port->is_enabled = false; ad_update_actor_keys(port, true); } agg = __get_first_agg(port); ad_agg_selection_logic(agg, &dummy); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed link status to %s\n", port->actor_port_number, link == BOND_LINK_UP ? "UP" : "DOWN"); /* RTNL is held and mode_lock is released so it's safe * to update slave_array here. */ bond_update_slave_arr(slave->bond, NULL); } /** * bond_3ad_set_carrier - set link state for bonding master * @bond: bonding structure * * if we have an active aggregator, we're up, if not, we're down. * Presumes that we cannot have an active aggregator if there are * no slaves with link up. * * This behavior complies with IEEE 802.3 section 43.3.9. * * Called by bond_set_carrier(). Return zero if carrier state does not * change, nonzero if it does. */ int bond_3ad_set_carrier(struct bonding *bond) { struct aggregator *active; struct slave *first_slave; int ret = 1; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); if (!first_slave) { ret = 0; goto out; } active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator)); if (active) { /* are enough slaves available to consider link up? */ if (__agg_active_ports(active) < bond->params.min_links) { if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); goto out; } } else if (!netif_carrier_ok(bond->dev)) { netif_carrier_on(bond->dev); goto out; } } else if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); } out: rcu_read_unlock(); return ret; } /** * __bond_3ad_get_active_agg_info - get information of the active aggregator * @bond: bonding struct to work on * @ad_info: ad_info struct to fill with the bond's info * * Returns: 0 on success * < 0 on error */ int __bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { struct aggregator *aggregator = NULL; struct list_head *iter; struct slave *slave; struct port *port; bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (port->aggregator && port->aggregator->is_active) { aggregator = port->aggregator; break; } } if (!aggregator) return -1; ad_info->aggregator_id = aggregator->aggregator_identifier; ad_info->ports = __agg_active_ports(aggregator); ad_info->actor_key = aggregator->actor_oper_aggregator_key; ad_info->partner_key = aggregator->partner_oper_aggregator_key; ether_addr_copy(ad_info->partner_system, aggregator->partner_system.mac_addr_value); return 0; } int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; rcu_read_lock(); ret = __bond_3ad_get_active_agg_info(bond, ad_info); rcu_read_unlock(); return ret; } int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct lacpdu *lacpdu, _lacpdu; if (skb->protocol != PKT_TYPE_LACPDU) return RX_HANDLER_ANOTHER; if (!MAC_ADDRESS_EQUAL(eth_hdr(skb)->h_dest, lacpdu_mcast_addr)) return RX_HANDLER_ANOTHER; lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); if (!lacpdu) { atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_illegal_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_illegal_rx); return RX_HANDLER_ANOTHER; } return bond_3ad_rx_indication(lacpdu, slave); } /** * bond_3ad_update_lacp_rate - change the lacp rate * @bond: bonding struct * * When modify lacp_rate parameter via sysfs, * update actor_oper_port_state of each port. * * Hold bond->mode_lock, * so we can modify port->actor_oper_port_state, * no matter bond is up or down. */ void bond_3ad_update_lacp_rate(struct bonding *bond) { struct port *port = NULL; struct list_head *iter; struct slave *slave; int lacp_fast; lacp_fast = bond->params.lacp_fast; spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; else port->actor_oper_port_state &= ~LACP_STATE_LACP_TIMEOUT; } spin_unlock_bh(&bond->mode_lock); } size_t bond_3ad_stats_size(void) { return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_UNKNOWN_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_ILLEGAL_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_TX */ nla_total_size_64bit(sizeof(u64)); /* BOND_3AD_STAT_MARKER_UNKNOWN_RX */ } int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats) { u64 val; val = atomic64_read(&stats->lacpdu_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_illegal_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_ILLEGAL_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; return 0; }
46 5 43 42 42 20 20 1 6 16 16 13 13 1 5 5 2 1 291 26 267 266 6 143 13 30 87 6 4 2 3 1 2 1 3 1 2 2 428 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C)2003,2004 USAGI/WIDE Project * * Author: * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> */ #include <linux/types.h> #include <linux/timer.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/in6.h> #include <linux/icmpv6.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> #include <linux/seq_file.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_log.h> #include "nf_internals.h" static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmp6hdr *hp; struct icmp6hdr _hdr; hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hp == NULL) return false; tuple->dst.u.icmp.type = hp->icmp6_type; tuple->src.u.icmp.id = hp->icmp6_identifier; tuple->dst.u.icmp.code = hp->icmp6_code; return true; } /* Add 1; spaces filled with 0. */ static const u_int8_t invmap[] = { [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1 }; static const u_int8_t noct_valid_new[] = { [ICMPV6_MGM_QUERY - 130] = 1, [ICMPV6_MGM_REPORT - 130] = 1, [ICMPV6_MGM_REDUCTION - 130] = 1, [NDISC_ROUTER_SOLICITATION - 130] = 1, [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, [ICMPV6_MLD2_REPORT - 130] = 1 }; bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { int type = orig->dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(invmap) || !invmap[type]) return false; tuple->src.u.icmp.id = orig->src.u.icmp.id; tuple->dst.u.icmp.type = invmap[type] - 1; tuple->dst.u.icmp.code = orig->dst.u.icmp.code; return true; } static unsigned int *icmpv6_get_timeouts(struct net *net) { return &nf_icmpv6_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_icmpv6_packet(struct nf_conn *ct, struct sk_buff *skb, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeout = nf_ct_timeout_lookup(ct); static const u8 valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, [ICMPV6_NI_QUERY - 128] = 1 }; if (state->pf != NFPROTO_IPV6) return -NF_ACCEPT; if (!nf_ct_is_confirmed(ct)) { int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); return -NF_ACCEPT; } } if (!timeout) timeout = icmpv6_get_timeouts(nf_ct_net(ct)); /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } static void icmpv6_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg); } static noinline_for_stack int nf_conntrack_icmpv6_redirect(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { u8 hl = ipv6_hdr(skb)->hop_limit; union nf_inet_addr outer_daddr; union { struct nd_opt_hdr nd_opt; struct rd_msg rd_msg; } tmp; const struct nd_opt_hdr *nd_opt; const struct rd_msg *rd_msg; rd_msg = skb_header_pointer(skb, dataoff, sizeof(*rd_msg), &tmp.rd_msg); if (!rd_msg) { icmpv6_error_log(skb, state, "short redirect"); return -NF_ACCEPT; } if (rd_msg->icmph.icmp6_code != 0) return NF_ACCEPT; if (hl != 255 || !(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { icmpv6_error_log(skb, state, "invalid saddr or hoplimit for redirect"); return -NF_ACCEPT; } dataoff += sizeof(*rd_msg); /* warning: rd_msg no longer usable after this call */ nd_opt = skb_header_pointer(skb, dataoff, sizeof(*nd_opt), &tmp.nd_opt); if (!nd_opt || nd_opt->nd_opt_len == 0) { icmpv6_error_log(skb, state, "redirect without options"); return -NF_ACCEPT; } /* We could call ndisc_parse_options(), but it would need * skb_linearize() and a bit more work. */ if (nd_opt->nd_opt_type != ND_OPT_REDIRECT_HDR) return NF_ACCEPT; memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, sizeof(outer_daddr.ip6)); dataoff += 8; return nf_conntrack_inet_error(tmpl, skb, dataoff, state, IPPROTO_ICMPV6, &outer_daddr); } int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { union nf_inet_addr outer_daddr; const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; int type; icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { icmpv6_error_log(skb, state, "short packet"); return -NF_ACCEPT; } if (state->hook == NF_INET_PRE_ROUTING && state->net->ct.sysctl_checksum && nf_ip6_checksum(skb, state->hook, dataoff, IPPROTO_ICMPV6)) { icmpv6_error_log(skb, state, "ICMPv6 checksum failed"); return -NF_ACCEPT; } type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return NF_ACCEPT; } if (icmp6h->icmp6_type == NDISC_REDIRECT) return nf_conntrack_icmpv6_redirect(tmpl, skb, dataoff, state); /* is not error message ? */ if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, sizeof(outer_daddr.ip6)); dataoff += sizeof(*icmp6h); return nf_conntrack_inet_error(tmpl, skb, dataoff, state, IPPROTO_ICMPV6, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) || nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) || nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 }, [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 }, [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 }, }; static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple, u_int32_t flags) { if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) { if (!tb[CTA_PROTO_ICMPV6_TYPE]) return -EINVAL; tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); if (tuple->dst.u.icmp.type < 128 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type - 128]) return -EINVAL; } if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) { if (!tb[CTA_PROTO_ICMPV6_CODE]) return -EINVAL; tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); } if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) { if (!tb[CTA_PROTO_ICMPV6_ID]) return -EINVAL; tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); } return 0; } static unsigned int icmpv6_nlattr_tuple_size(void) { static unsigned int size __read_mostly; if (!size) size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); return size; } #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeout = data; struct nf_icmp_net *in = nf_icmpv6_pernet(net); if (!timeout) timeout = icmpv6_get_timeouts(net); if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; } else { /* Set default ICMPv6 timeout. */ *timeout = in->timeout; } return 0; } static int icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeout = data; if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_icmpv6_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmpv6_pernet(net); in->timeout = nf_ct_icmpv6_timeout; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l4proto = IPPROTO_ICMPV6, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, .nlattr_to_tuple = icmpv6_nlattr_to_tuple, .nla_policy = icmpv6_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_ICMP_MAX, .obj_size = sizeof(unsigned int), .nla_policy = icmpv6_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ };
9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 // SPDX-License-Identifier: GPL-2.0 #include <linux/linkage.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/timex.h> #include <linux/random.h> #include <linux/init.h> #include <linux/kernel_stat.h> #include <linux/syscore_ops.h> #include <linux/bitops.h> #include <linux/acpi.h> #include <linux/io.h> #include <linux/delay.h> #include <linux/pgtable.h> #include <linux/atomic.h> #include <asm/timer.h> #include <asm/hw_irq.h> #include <asm/desc.h> #include <asm/apic.h> #include <asm/i8259.h> /* * This is the 'legacy' 8259A Programmable Interrupt Controller, * present in the majority of PC/AT boxes. * plus some generic x86 specific things if generic specifics makes * any sense at all. */ static void init_8259A(int auto_eoi); static bool pcat_compat __ro_after_init; static int i8259A_auto_eoi; DEFINE_RAW_SPINLOCK(i8259A_lock); /* * 8259A PIC functions to handle ISA devices: */ /* * This contains the irq mask for both 8259A irq controllers, */ unsigned int cached_irq_mask = 0xffff; /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) * boards the timer interrupt is not really connected to any IO-APIC pin, * it's fed to the master 8259A's IR0 line only. * * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. * this 'mixed mode' IRQ handling costs nothing because it's only used * at IRQ setup time. */ unsigned long io_apic_irqs; static void mask_8259A_irq(unsigned int irq) { unsigned int mask = 1 << irq; unsigned long flags; raw_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); raw_spin_unlock_irqrestore(&i8259A_lock, flags); } static void disable_8259A_irq(struct irq_data *data) { mask_8259A_irq(data->irq); } static void unmask_8259A_irq(unsigned int irq) { unsigned int mask = ~(1 << irq); unsigned long flags; raw_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); raw_spin_unlock_irqrestore(&i8259A_lock, flags); } static void enable_8259A_irq(struct irq_data *data) { unmask_8259A_irq(data->irq); } static int i8259A_irq_pending(unsigned int irq) { unsigned int mask = 1<<irq; unsigned long flags; int ret; raw_spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) ret = inb(PIC_MASTER_CMD) & mask; else ret = inb(PIC_SLAVE_CMD) & (mask >> 8); raw_spin_unlock_irqrestore(&i8259A_lock, flags); return ret; } static void make_8259A_irq(unsigned int irq) { disable_irq_nosync(irq); io_apic_irqs &= ~(1<<irq); irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); irq_set_status_flags(irq, IRQ_LEVEL); enable_irq(irq); lapic_assign_legacy_vector(irq, true); } /* * This function assumes to be called rarely. Switching between * 8259A registers is slow. * This has to be protected by the irq controller spinlock * before being called. */ static inline int i8259A_irq_real(unsigned int irq) { int value; int irqmask = 1<<irq; if (irq < 8) { outb(0x0B, PIC_MASTER_CMD); /* ISR register */ value = inb(PIC_MASTER_CMD) & irqmask; outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */ return value; } outb(0x0B, PIC_SLAVE_CMD); /* ISR register */ value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */ return value; } /* * Careful! The 8259A is a fragile beast, it pretty * much _has_ to be done exactly like this (mask it * first, _then_ send the EOI, and the order of EOI * to the two 8259s is important! */ static void mask_and_ack_8259A(struct irq_data *data) { unsigned int irq = data->irq; unsigned int irqmask = 1 << irq; unsigned long flags; raw_spin_lock_irqsave(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign * of hardware problems, so we only do the checks we can * do without slowing down good hardware unnecessarily. * * Note that IRQ7 and IRQ15 (the two spurious IRQs * usually resulting from the 8259A-1|2 PICs) occur * even if the IRQ is masked in the 8259A. Thus we * can check spurious 8259A IRQs without doing the * quite slow i8259A_irq_real() call for every IRQ. * This does not cover 100% of spurious interrupts, * but should be enough to warn the user that there * is something bad going on ... */ if (cached_irq_mask & irqmask) goto spurious_8259A_irq; cached_irq_mask |= irqmask; handle_real_irq: if (irq & 8) { inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* 'Specific EOI' to slave */ outb(0x60+(irq&7), PIC_SLAVE_CMD); /* 'Specific EOI' to master-IRQ2 */ outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD); } else { inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ outb(cached_master_mask, PIC_MASTER_IMR); outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } raw_spin_unlock_irqrestore(&i8259A_lock, flags); return; spurious_8259A_irq: /* * this is the slow path - should happen rarely. */ if (i8259A_irq_real(irq)) /* * oops, the IRQ _is_ in service according to the * 8259A - not spurious, go handle it. */ goto handle_real_irq; { static int spurious_irq_mask; /* * At this point we can be sure the IRQ is spurious, * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { printk_deferred(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } atomic_inc(&irq_err_count); /* * Theoretically we do not have to handle this IRQ, * but in Linux this does not cause problems and is * simpler for us. */ goto handle_real_irq; } } struct irq_chip i8259A_chip = { .name = "XT-PIC", .irq_mask = disable_8259A_irq, .irq_disable = disable_8259A_irq, .irq_unmask = enable_8259A_irq, .irq_mask_ack = mask_and_ack_8259A, }; static char irq_trigger[2]; /* ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ */ static void restore_ELCR(char *trigger) { outb(trigger[0], PIC_ELCR1); outb(trigger[1], PIC_ELCR2); } static void save_ELCR(char *trigger) { /* IRQ 0,1,2,8,13 are marked as reserved */ trigger[0] = inb(PIC_ELCR1) & 0xF8; trigger[1] = inb(PIC_ELCR2) & 0xDE; } static void i8259A_resume(void) { init_8259A(i8259A_auto_eoi); restore_ELCR(irq_trigger); } static int i8259A_suspend(void) { save_ELCR(irq_trigger); return 0; } static void i8259A_shutdown(void) { /* Put the i8259A into a quiescent state that * the kernel initialization code can get it * out of. */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } static struct syscore_ops i8259_syscore_ops = { .suspend = i8259A_suspend, .resume = i8259A_resume, .shutdown = i8259A_shutdown, }; static void mask_8259A(void) { unsigned long flags; raw_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ raw_spin_unlock_irqrestore(&i8259A_lock, flags); } static void unmask_8259A(void) { unsigned long flags; raw_spin_lock_irqsave(&i8259A_lock, flags); outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ raw_spin_unlock_irqrestore(&i8259A_lock, flags); } static int probe_8259A(void) { unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR); unsigned long flags; /* * If MADT has the PCAT_COMPAT flag set, then do not bother probing * for the PIC. Some BIOSes leave the PIC uninitialized and probing * fails. * * Right now this causes problems as quite some code depends on * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly * when the system has an IO/APIC because then PIC is not required * at all, except for really old machines where the timer interrupt * must be routed through the PIC. So just pretend that the PIC is * there and let legacy_pic->init() initialize it for nothing. * * Alternatively this could just try to initialize the PIC and * repeat the probe, but for cases where there is no PIC that's * just pointless. */ if (pcat_compat) return nr_legacy_irqs(); /* * Check to see if we have a PIC. Mask all except the cascade and * read back the value we just wrote. If we don't have a PIC, we * will read 0xff as opposed to the value we wrote. */ raw_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ outb(probe_val, PIC_MASTER_IMR); new_val = inb(PIC_MASTER_IMR); if (new_val != probe_val) { printk(KERN_INFO "Using NULL legacy PIC\n"); legacy_pic = &null_legacy_pic; } raw_spin_unlock_irqrestore(&i8259A_lock, flags); return nr_legacy_irqs(); } static void init_8259A(int auto_eoi) { unsigned long flags; i8259A_auto_eoi = auto_eoi; raw_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ /* * outb_pic - this has to work on a wide range of PC hardware. */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ /* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */ outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); if (auto_eoi) /* master does Auto EOI */ outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); else /* master expects normal EOI */ outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ /* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */ outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); if (auto_eoi) /* * In AEOI mode we just have to mask the interrupt * when acking. */ i8259A_chip.irq_mask_ack = disable_8259A_irq; else i8259A_chip.irq_mask_ack = mask_and_ack_8259A; udelay(100); /* wait for 8259A to initialize */ outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ raw_spin_unlock_irqrestore(&i8259A_lock, flags); } /* * make i8259 a driver so that we can select pic functions at run time. the goal * is to make x86 binary compatible among pc compatible and non-pc compatible * platforms, such as x86 MID. */ static void legacy_pic_noop(void) { }; static void legacy_pic_uint_noop(unsigned int unused) { }; static void legacy_pic_int_noop(int unused) { }; static int legacy_pic_irq_pending_noop(unsigned int irq) { return 0; } static int legacy_pic_probe(void) { return 0; } struct legacy_pic null_legacy_pic = { .nr_legacy_irqs = 0, .chip = &dummy_irq_chip, .mask = legacy_pic_uint_noop, .unmask = legacy_pic_uint_noop, .mask_all = legacy_pic_noop, .restore_mask = legacy_pic_noop, .init = legacy_pic_int_noop, .probe = legacy_pic_probe, .irq_pending = legacy_pic_irq_pending_noop, .make_irq = legacy_pic_uint_noop, }; static struct legacy_pic default_legacy_pic = { .nr_legacy_irqs = NR_IRQS_LEGACY, .chip = &i8259A_chip, .mask = mask_8259A_irq, .unmask = unmask_8259A_irq, .mask_all = mask_8259A, .restore_mask = unmask_8259A, .init = init_8259A, .probe = probe_8259A, .irq_pending = i8259A_irq_pending, .make_irq = make_8259A_irq, }; struct legacy_pic *legacy_pic = &default_legacy_pic; EXPORT_SYMBOL(legacy_pic); static int __init i8259A_init_ops(void) { if (legacy_pic == &default_legacy_pic) register_syscore_ops(&i8259_syscore_ops); return 0; } device_initcall(i8259A_init_ops); void __init legacy_pic_pcat_compat(void) { pcat_compat = true; }
20 16 15 1 4 4 3 3 2 2 7 5 2 2 7 7 2 3 5 5 5 1 30 2 3 7 21 5 23 19 9 9 19 7 5 1 4 7 16 13 14 11 3 7 6 16 10 6 4 12 7 9 16 16 26 1 15 15 13 6 7 13 13 2 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_tbf.c Token Bucket Filter queue. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs - * original idea by Martin Devera */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/gso.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> /* Simple Token Bucket Filter. ======================================= SOURCE. ------- None. Description. ------------ A data flow obeys TBF with rate R and depth B, if for any time interval t_i...t_f the number of transmitted bits does not exceed B + R*(t_f-t_i). Packetized version of this definition: The sequence of packets of sizes s_i served at moments t_i obeys TBF, if for any i<=k: s_i+....+s_k <= B + R*(t_k - t_i) Algorithm. ---------- Let N(t_i) be B/R initially and N(t) grow continuously with time as: N(t+delta) = min{B/R, N(t) + delta} If the first packet in queue has length S, it may be transmitted only at the time t_* when S/R <= N(t_*), and in this case N(t) jumps: N(t_* + 0) = N(t_* - 0) - S/R. Actually, QoS requires two TBF to be applied to a data stream. One of them controls steady state burst size, another one with rate P (peak rate) and depth M (equal to link MTU) limits bursts at a smaller time scale. It is easy to see that P>R, and B>M. If P is infinity, this double TBF is equivalent to a single one. When TBF works in reshaping mode, latency is estimated as: lat = max ((L-B)/R, (L-M)/P) NOTES. ------ If TBF throttles, it starts a watchdog timer, which will wake it up when it is ready to transmit. Note that the minimal timer resolution is 1/HZ. If no new packets arrive during this period, or if the device is not awaken by EOI for some previous packet, TBF can stop its activity for 1/HZ. This means, that with depth B, the maximal rate is R_crit = B*HZ F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes. Note that the peak rate TBF is much more tough: with MTU 1500 P_crit = 150Kbytes/sec. So, if you need greater peak rates, use alpha with HZ=1000 :-) With classful TBF, limit is just kept for backwards compatibility. It is passed to the default bfifo qdisc - if the inner qdisc is changed the limit is not effective anymore. */ struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ u32 max_size; s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ s64 mtu; struct psched_ratecfg rate; struct psched_ratecfg peak; /* Variables */ s64 tokens; /* Current number of B tokens */ s64 ptokens; /* Current number of P tokens */ s64 t_c; /* Time check-point */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ struct qdisc_watchdog watchdog; /* Watchdog timer */ }; /* Time to Length, convert time in ns to length in bytes * to determinate how many bytes can be sent in given time. */ static u64 psched_ns_t2l(const struct psched_ratecfg *r, u64 time_in_ns) { /* The formula is : * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC */ u64 len = time_in_ns * r->rate_bytes_ps; do_div(len, NSEC_PER_SEC); if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { do_div(len, 53); len = len * 48; } if (len > r->overhead) len -= r->overhead; else len = 0; return len; } static void tbf_offload_change(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_tbf_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_TBF_REPLACE; qopt.handle = sch->handle; qopt.parent = sch->parent; qopt.replace_params.rate = q->rate; qopt.replace_params.max_size = q->max_size; qopt.replace_params.qstats = &sch->qstats; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); } static void tbf_offload_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct tc_tbf_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_TBF_DESTROY; qopt.handle = sch->handle; qopt.parent = sch->parent; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); } static int tbf_offload_dump(struct Qdisc *sch) { struct tc_tbf_qopt_offload qopt; qopt.command = TC_TBF_STATS; qopt.handle = sch->handle; qopt.parent = sch->parent; qopt.stats.bstats = &sch->bstats; qopt.stats.qstats = &sch->qstats; return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); } static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) { struct tc_tbf_qopt_offload graft_offload = { .handle = sch->handle, .parent = sch->parent, .child_handle = new->handle, .command = TC_TBF_GRAFT, }; qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, TC_SETUP_QDISC_TBF, &graft_offload, extack); } /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); unsigned int len = 0, prev_len = qdisc_pkt_len(skb); int ret, nb; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) return qdisc_drop(skb, sch, to_free); nb = 0; skb_list_walk_safe(segs, segs, nskb) { skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; len += segs->len; ret = qdisc_enqueue(segs, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); } else { nb++; } } sch->q.qlen += nb; if (nb > 1) qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); consume_skb(skb); return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); int ret; if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_validate_mac_len(skb, q->max_size)) return tbf_segment(skb, sch, to_free); return qdisc_drop(skb, sch, to_free); } ret = qdisc_enqueue(skb, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); return ret; } sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } static bool tbf_peak_present(const struct tbf_sched_data *q) { return q->peak.rate_bytes_ps; } static struct sk_buff *tbf_dequeue(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; skb = q->qdisc->ops->peek(q->qdisc); if (skb) { s64 now; s64 toks; s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); now = ktime_get_ns(); toks = min_t(s64, now - q->t_c, q->buffer); if (tbf_peak_present(q)) { ptoks = toks + q->ptokens; if (ptoks > q->mtu) ptoks = q->mtu; ptoks -= (s64) psched_l2t_ns(&q->peak, len); } toks += q->tokens; if (toks > q->buffer) toks = q->buffer; toks -= (s64) psched_l2t_ns(&q->rate, len); if ((toks|ptoks) >= 0) { skb = qdisc_dequeue_peeked(q->qdisc); if (unlikely(!skb)) return NULL; q->t_c = now; q->tokens = toks; q->ptokens = ptoks; qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); return skb; } qdisc_watchdog_schedule_ns(&q->watchdog, now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, but, however, this is wrong in principle. We MUST NOT reorder packets under these circumstances. Really, if we split the flow into independent subflows, it would be a very good solution. This is the main idea of all FQ algorithms (cf. CSZ, HPFQ, HFSC) */ qdisc_qstats_overlimit(sch); } return NULL; } static void tbf_reset(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); } static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_RATE64] = { .type = NLA_U64 }, [TCA_TBF_PRATE64] = { .type = NLA_U64 }, [TCA_TBF_BURST] = { .type = NLA_U32 }, [TCA_TBF_PBURST] = { .type = NLA_U32 }, }; static int tbf_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { int err; struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; struct Qdisc *child = NULL; struct Qdisc *old = NULL; struct psched_ratecfg rate; struct psched_ratecfg peak; u64 max_size; s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); if (err < 0) return err; err = -EINVAL; if (tb[TCA_TBF_PARMS] == NULL) goto done; qopt = nla_data(tb[TCA_TBF_PARMS]); if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB], NULL)); if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB], NULL)); buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); if (tb[TCA_TBF_RATE64]) rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); psched_ratecfg_precompute(&rate, &qopt->rate, rate64); if (tb[TCA_TBF_BURST]) { max_size = nla_get_u32(tb[TCA_TBF_BURST]); buffer = psched_l2t_ns(&rate, max_size); } else { max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); } if (qopt->peakrate.rate) { if (tb[TCA_TBF_PRATE64]) prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", peak.rate_bytes_ps, rate.rate_bytes_ps); err = -EINVAL; goto done; } if (tb[TCA_TBF_PBURST]) { u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); max_size = min_t(u32, max_size, pburst); mtu = psched_l2t_ns(&peak, pburst); } else { max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); } } else { memset(&peak, 0, sizeof(peak)); } if (max_size < psched_mtu(qdisc_dev(sch))) pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", max_size, qdisc_dev(sch)->name, psched_mtu(qdisc_dev(sch))); if (!max_size) { err = -EINVAL; goto done; } if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) goto done; } else if (qopt->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit, extack); if (IS_ERR(child)) { err = PTR_ERR(child); goto done; } /* child is fifo, no need to check for noop_qdisc */ qdisc_hash_add(child, true); } sch_tree_lock(sch); if (child) { qdisc_tree_flush_backlog(q->qdisc); old = q->qdisc; q->qdisc = child; } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) q->mtu = mtu; else q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; if (tb[TCA_TBF_BURST]) q->buffer = buffer; else q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); sch_tree_unlock(sch); qdisc_put(old); err = 0; tbf_offload_change(sch); done: return err; } static int tbf_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; if (!opt) return -EINVAL; q->t_c = ktime_get_ns(); return tbf_change(sch, opt, extack); } static void tbf_destroy(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); tbf_offload_destroy(sch); qdisc_put(q->qdisc); } static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *nest; struct tc_tbf_qopt opt; int err; err = tbf_offload_dump(sch); if (err) return err; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; opt.limit = q->limit; psched_ratecfg_getrate(&opt.rate, &q->rate); if (tbf_peak_present(q)) psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = PSCHED_NS2TICKS(q->mtu); opt.buffer = PSCHED_NS2TICKS(q->buffer); if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (q->rate.rate_bytes_ps >= (1ULL << 32) && nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps, TCA_TBF_PAD)) goto nla_put_failure; if (tbf_peak_present(q) && q->peak.rate_bytes_ps >= (1ULL << 32) && nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps, TCA_TBF_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static int tbf_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct tbf_sched_data *q = qdisc_priv(sch); tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; } static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->qdisc); tbf_offload_graft(sch, new, *old, extack); return 0; } static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) { struct tbf_sched_data *q = qdisc_priv(sch); return q->qdisc; } static unsigned long tbf_find(struct Qdisc *sch, u32 classid) { return 1; } static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { tc_qdisc_stats_dump(sch, 1, walker); } } static const struct Qdisc_class_ops tbf_class_ops = { .graft = tbf_graft, .leaf = tbf_leaf, .find = tbf_find, .walk = tbf_walk, .dump = tbf_dump_class, }; static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .next = NULL, .cl_ops = &tbf_class_ops, .id = "tbf", .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, .peek = qdisc_peek_dequeued, .init = tbf_init, .reset = tbf_reset, .destroy = tbf_destroy, .change = tbf_change, .dump = tbf_dump, .owner = THIS_MODULE, }; static int __init tbf_module_init(void) { return register_qdisc(&tbf_qdisc_ops); } static void __exit tbf_module_exit(void) { unregister_qdisc(&tbf_qdisc_ops); } module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Token Bucket Filter qdisc");
3 3 3 3 3 1 1 23 23 23 23 39 39 39 39 39 39 39 39 39 23 23 23 23 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 // SPDX-License-Identifier: GPL-2.0-or-later /* * (Tentative) USB Audio Driver for ALSA * * Mixer control part * * Copyright (c) 2002 by Takashi Iwai <tiwai@suse.de> * * Many codes borrowed from audio.c by * Alan Cox (alan@lxorguk.ukuu.org.uk) * Thomas Sailer (sailer@ife.ee.ethz.ch) */ /* * TODOs, for both the mixer and the streaming interfaces: * * - support for UAC2 effect units * - support for graphical equalizers * - RANGE and MEM set commands (UAC2) * - RANGE and MEM interrupt dispatchers (UAC2) * - audio channel clustering (UAC2) * - audio sample rate converter units (UAC2) * - proper handling of clock multipliers (UAC2) * - dispatch clock change notifications (UAC2) * - stop PCM streams which use a clock that became invalid * - stop PCM streams which use a clock selector that has changed * - parse available sample rates again when clock sources changed */ #include <linux/bitops.h> #include <linux/init.h> #include <linux/list.h> #include <linux/log2.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <linux/usb/audio-v2.h> #include <linux/usb/audio-v3.h> #include <sound/core.h> #include <sound/control.h> #include <sound/hwdep.h> #include <sound/info.h> #include <sound/tlv.h> #include "usbaudio.h" #include "mixer.h" #include "helper.h" #include "mixer_quirks.h" #include "power.h" #define MAX_ID_ELEMS 256 struct usb_audio_term { int id; int type; int channels; unsigned int chconfig; int name; }; struct usbmix_name_map; struct mixer_build { struct snd_usb_audio *chip; struct usb_mixer_interface *mixer; unsigned char *buffer; unsigned int buflen; DECLARE_BITMAP(unitbitmap, MAX_ID_ELEMS); DECLARE_BITMAP(termbitmap, MAX_ID_ELEMS); struct usb_audio_term oterm; const struct usbmix_name_map *map; const struct usbmix_selector_map *selector_map; }; /*E-mu 0202/0404/0204 eXtension Unit(XU) control*/ enum { USB_XU_CLOCK_RATE = 0xe301, USB_XU_CLOCK_SOURCE = 0xe302, USB_XU_DIGITAL_IO_STATUS = 0xe303, USB_XU_DEVICE_OPTIONS = 0xe304, USB_XU_DIRECT_MONITORING = 0xe305, USB_XU_METERING = 0xe306 }; enum { USB_XU_CLOCK_SOURCE_SELECTOR = 0x02, /* clock source*/ USB_XU_CLOCK_RATE_SELECTOR = 0x03, /* clock rate */ USB_XU_DIGITAL_FORMAT_SELECTOR = 0x01, /* the spdif format */ USB_XU_SOFT_LIMIT_SELECTOR = 0x03 /* soft limiter */ }; /* * manual mapping of mixer names * if the mixer topology is too complicated and the parsed names are * ambiguous, add the entries in usbmixer_maps.c. */ #include "mixer_maps.c" static const struct usbmix_name_map * find_map(const struct usbmix_name_map *p, int unitid, int control) { if (!p) return NULL; for (; p->id; p++) { if (p->id == unitid && (!control || !p->control || control == p->control)) return p; } return NULL; } /* get the mapped name if the unit matches */ static int check_mapped_name(const struct usbmix_name_map *p, char *buf, int buflen) { int len; if (!p || !p->name) return 0; buflen--; len = strscpy(buf, p->name, buflen); return len < 0 ? buflen : len; } /* ignore the error value if ignore_ctl_error flag is set */ #define filter_error(cval, err) \ ((cval)->head.mixer->ignore_ctl_error ? 0 : (err)) /* check whether the control should be ignored */ static inline int check_ignored_ctl(const struct usbmix_name_map *p) { if (!p || p->name || p->dB) return 0; return 1; } /* dB mapping */ static inline void check_mapped_dB(const struct usbmix_name_map *p, struct usb_mixer_elem_info *cval) { if (p && p->dB) { cval->dBmin = p->dB->min; cval->dBmax = p->dB->max; cval->min_mute = p->dB->min_mute; cval->initialized = 1; } } /* get the mapped selector source name */ static int check_mapped_selector_name(struct mixer_build *state, int unitid, int index, char *buf, int buflen) { const struct usbmix_selector_map *p; int len; if (!state->selector_map) return 0; for (p = state->selector_map; p->id; p++) { if (p->id == unitid && index < p->count) { len = strscpy(buf, p->names[index], buflen); return len < 0 ? buflen : len; } } return 0; } /* * find an audio control unit with the given unit id */ static void *find_audio_control_unit(struct mixer_build *state, unsigned char unit) { /* we just parse the header */ struct uac_feature_unit_descriptor *hdr = NULL; while ((hdr = snd_usb_find_desc(state->buffer, state->buflen, hdr, USB_DT_CS_INTERFACE)) != NULL) { if (hdr->bLength >= 4 && hdr->bDescriptorSubtype >= UAC_INPUT_TERMINAL && hdr->bDescriptorSubtype <= UAC3_SAMPLE_RATE_CONVERTER && hdr->bUnitID == unit) return hdr; } return NULL; } /* * copy a string with the given id */ static int snd_usb_copy_string_desc(struct snd_usb_audio *chip, int index, char *buf, int maxlen) { int len = usb_string(chip->dev, index, buf, maxlen - 1); if (len < 0) return 0; buf[len] = 0; return len; } /* * convert from the byte/word on usb descriptor to the zero-based integer */ static int convert_signed_value(struct usb_mixer_elem_info *cval, int val) { switch (cval->val_type) { case USB_MIXER_BOOLEAN: return !!val; case USB_MIXER_INV_BOOLEAN: return !val; case USB_MIXER_U8: val &= 0xff; break; case USB_MIXER_S8: val &= 0xff; if (val >= 0x80) val -= 0x100; break; case USB_MIXER_U16: val &= 0xffff; break; case USB_MIXER_S16: val &= 0xffff; if (val >= 0x8000) val -= 0x10000; break; } return val; } /* * convert from the zero-based int to the byte/word for usb descriptor */ static int convert_bytes_value(struct usb_mixer_elem_info *cval, int val) { switch (cval->val_type) { case USB_MIXER_BOOLEAN: return !!val; case USB_MIXER_INV_BOOLEAN: return !val; case USB_MIXER_S8: case USB_MIXER_U8: return val & 0xff; case USB_MIXER_S16: case USB_MIXER_U16: return val & 0xffff; } return 0; /* not reached */ } static int get_relative_value(struct usb_mixer_elem_info *cval, int val) { if (!cval->res) cval->res = 1; if (val < cval->min) return 0; else if (val >= cval->max) return DIV_ROUND_UP(cval->max - cval->min, cval->res); else return (val - cval->min) / cval->res; } static int get_abs_value(struct usb_mixer_elem_info *cval, int val) { if (val < 0) return cval->min; if (!cval->res) cval->res = 1; val *= cval->res; val += cval->min; if (val > cval->max) return cval->max; return val; } static int uac2_ctl_value_size(int val_type) { switch (val_type) { case USB_MIXER_S32: case USB_MIXER_U32: return 4; case USB_MIXER_S16: case USB_MIXER_U16: return 2; default: return 1; } return 0; /* unreachable */ } /* * retrieve a mixer value */ static inline int mixer_ctrl_intf(struct usb_mixer_interface *mixer) { return get_iface_desc(mixer->hostif)->bInterfaceNumber; } static int get_ctl_value_v1(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret) { struct snd_usb_audio *chip = cval->head.mixer->chip; unsigned char buf[2]; int val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1; int timeout = 10; int idx = 0, err; err = snd_usb_lock_shutdown(chip); if (err < 0) return -EIO; while (timeout-- > 0) { idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8); err = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), request, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, validx, idx, buf, val_len); if (err >= val_len) { *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(buf, val_len)); err = 0; goto out; } else if (err == -ETIMEDOUT) { goto out; } } usb_audio_dbg(chip, "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n", request, validx, idx, cval->val_type); err = -EINVAL; out: snd_usb_unlock_shutdown(chip); return err; } static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret) { struct snd_usb_audio *chip = cval->head.mixer->chip; /* enough space for one range */ unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)]; unsigned char *val; int idx = 0, ret, val_size, size; __u8 bRequest; val_size = uac2_ctl_value_size(cval->val_type); if (request == UAC_GET_CUR) { bRequest = UAC2_CS_CUR; size = val_size; } else { bRequest = UAC2_CS_RANGE; size = sizeof(__u16) + 3 * val_size; } memset(buf, 0, sizeof(buf)); if (snd_usb_lock_shutdown(chip)) return -EIO; idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8); ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), bRequest, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, validx, idx, buf, size); snd_usb_unlock_shutdown(chip); if (ret < 0) { usb_audio_dbg(chip, "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n", request, validx, idx, cval->val_type); return ret; } /* FIXME: how should we handle multiple triplets here? */ switch (request) { case UAC_GET_CUR: val = buf; break; case UAC_GET_MIN: val = buf + sizeof(__u16); break; case UAC_GET_MAX: val = buf + sizeof(__u16) + val_size; break; case UAC_GET_RES: val = buf + sizeof(__u16) + val_size * 2; break; default: return -EINVAL; } *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, val_size)); return 0; } static int get_ctl_value(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret) { validx += cval->idx_off; return (cval->head.mixer->protocol == UAC_VERSION_1) ? get_ctl_value_v1(cval, request, validx, value_ret) : get_ctl_value_v2(cval, request, validx, value_ret); } static int get_cur_ctl_value(struct usb_mixer_elem_info *cval, int validx, int *value) { return get_ctl_value(cval, UAC_GET_CUR, validx, value); } /* channel = 0: master, 1 = first channel */ static inline int get_cur_mix_raw(struct usb_mixer_elem_info *cval, int channel, int *value) { return get_ctl_value(cval, UAC_GET_CUR, (cval->control << 8) | channel, value); } int snd_usb_get_cur_mix_value(struct usb_mixer_elem_info *cval, int channel, int index, int *value) { int err; if (cval->cached & (1 << channel)) { *value = cval->cache_val[index]; return 0; } err = get_cur_mix_raw(cval, channel, value); if (err < 0) { if (!cval->head.mixer->ignore_ctl_error) usb_audio_dbg(cval->head.mixer->chip, "cannot get current value for control %d ch %d: err = %d\n", cval->control, channel, err); return err; } cval->cached |= 1 << channel; cval->cache_val[index] = *value; return 0; } /* * set a mixer value */ int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval, int request, int validx, int value_set) { struct snd_usb_audio *chip = cval->head.mixer->chip; unsigned char buf[4]; int idx = 0, val_len, err, timeout = 10; validx += cval->idx_off; if (cval->head.mixer->protocol == UAC_VERSION_1) { val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1; } else { /* UAC_VERSION_2/3 */ val_len = uac2_ctl_value_size(cval->val_type); /* FIXME */ if (request != UAC_SET_CUR) { usb_audio_dbg(chip, "RANGE setting not yet supported\n"); return -EINVAL; } request = UAC2_CS_CUR; } value_set = convert_bytes_value(cval, value_set); buf[0] = value_set & 0xff; buf[1] = (value_set >> 8) & 0xff; buf[2] = (value_set >> 16) & 0xff; buf[3] = (value_set >> 24) & 0xff; err = snd_usb_lock_shutdown(chip); if (err < 0) return -EIO; while (timeout-- > 0) { idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8); err = snd_usb_ctl_msg(chip->dev, usb_sndctrlpipe(chip->dev, 0), request, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT, validx, idx, buf, val_len); if (err >= 0) { err = 0; goto out; } else if (err == -ETIMEDOUT) { goto out; } } usb_audio_dbg(chip, "cannot set ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d, data = %#x/%#x\n", request, validx, idx, cval->val_type, buf[0], buf[1]); err = -EINVAL; out: snd_usb_unlock_shutdown(chip); return err; } static int set_cur_ctl_value(struct usb_mixer_elem_info *cval, int validx, int value) { return snd_usb_mixer_set_ctl_value(cval, UAC_SET_CUR, validx, value); } int snd_usb_set_cur_mix_value(struct usb_mixer_elem_info *cval, int channel, int index, int value) { int err; unsigned int read_only = (channel == 0) ? cval->master_readonly : cval->ch_readonly & (1 << (channel - 1)); if (read_only) { usb_audio_dbg(cval->head.mixer->chip, "%s(): channel %d of control %d is read_only\n", __func__, channel, cval->control); return 0; } err = snd_usb_mixer_set_ctl_value(cval, UAC_SET_CUR, (cval->control << 8) | channel, value); if (err < 0) return err; cval->cached |= 1 << channel; cval->cache_val[index] = value; return 0; } /* * TLV callback for mixer volume controls */ int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int size, unsigned int __user *_tlv) { struct usb_mixer_elem_info *cval = kcontrol->private_data; DECLARE_TLV_DB_MINMAX(scale, 0, 0); if (size < sizeof(scale)) return -ENOMEM; if (cval->min_mute) scale[0] = SNDRV_CTL_TLVT_DB_MINMAX_MUTE; scale[2] = cval->dBmin; scale[3] = cval->dBmax; if (copy_to_user(_tlv, scale, sizeof(scale))) return -EFAULT; return 0; } /* * parser routines begin here... */ static int parse_audio_unit(struct mixer_build *state, int unitid); /* * check if the input/output channel routing is enabled on the given bitmap. * used for mixer unit parser */ static int check_matrix_bitmap(unsigned char *bmap, int ich, int och, int num_outs) { int idx = ich * num_outs + och; return bmap[idx >> 3] & (0x80 >> (idx & 7)); } /* * add an alsa control element * search and increment the index until an empty slot is found. * * if failed, give up and free the control instance. */ int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list, struct snd_kcontrol *kctl, bool is_std_info) { struct usb_mixer_interface *mixer = list->mixer; int err; while (snd_ctl_find_id(mixer->chip->card, &kctl->id)) kctl->id.index++; err = snd_ctl_add(mixer->chip->card, kctl); if (err < 0) { usb_audio_dbg(mixer->chip, "cannot add control (err = %d)\n", err); return err; } list->kctl = kctl; list->is_std_info = is_std_info; list->next_id_elem = mixer->id_elems[list->id]; mixer->id_elems[list->id] = list; return 0; } /* * get a terminal name string */ static struct iterm_name_combo { int type; char *name; } iterm_names[] = { { 0x0300, "Output" }, { 0x0301, "Speaker" }, { 0x0302, "Headphone" }, { 0x0303, "HMD Audio" }, { 0x0304, "Desktop Speaker" }, { 0x0305, "Room Speaker" }, { 0x0306, "Com Speaker" }, { 0x0307, "LFE" }, { 0x0600, "External In" }, { 0x0601, "Analog In" }, { 0x0602, "Digital In" }, { 0x0603, "Line" }, { 0x0604, "Legacy In" }, { 0x0605, "IEC958 In" }, { 0x0606, "1394 DA Stream" }, { 0x0607, "1394 DV Stream" }, { 0x0700, "Embedded" }, { 0x0701, "Noise Source" }, { 0x0702, "Equalization Noise" }, { 0x0703, "CD" }, { 0x0704, "DAT" }, { 0x0705, "DCC" }, { 0x0706, "MiniDisk" }, { 0x0707, "Analog Tape" }, { 0x0708, "Phonograph" }, { 0x0709, "VCR Audio" }, { 0x070a, "Video Disk Audio" }, { 0x070b, "DVD Audio" }, { 0x070c, "TV Tuner Audio" }, { 0x070d, "Satellite Rec Audio" }, { 0x070e, "Cable Tuner Audio" }, { 0x070f, "DSS Audio" }, { 0x0710, "Radio Receiver" }, { 0x0711, "Radio Transmitter" }, { 0x0712, "Multi-Track Recorder" }, { 0x0713, "Synthesizer" }, { 0 }, }; static int get_term_name(struct snd_usb_audio *chip, struct usb_audio_term *iterm, unsigned char *name, int maxlen, int term_only) { struct iterm_name_combo *names; int len; if (iterm->name) { len = snd_usb_copy_string_desc(chip, iterm->name, name, maxlen); if (len) return len; } /* virtual type - not a real terminal */ if (iterm->type >> 16) { if (term_only) return 0; switch (iterm->type >> 16) { case UAC3_SELECTOR_UNIT: strcpy(name, "Selector"); return 8; case UAC3_PROCESSING_UNIT: strcpy(name, "Process Unit"); return 12; case UAC3_EXTENSION_UNIT: strcpy(name, "Ext Unit"); return 8; case UAC3_MIXER_UNIT: strcpy(name, "Mixer"); return 5; default: return sprintf(name, "Unit %d", iterm->id); } } switch (iterm->type & 0xff00) { case 0x0100: strcpy(name, "PCM"); return 3; case 0x0200: strcpy(name, "Mic"); return 3; case 0x0400: strcpy(name, "Headset"); return 7; case 0x0500: strcpy(name, "Phone"); return 5; } for (names = iterm_names; names->type; names++) { if (names->type == iterm->type) { strcpy(name, names->name); return strlen(names->name); } } return 0; } /* * Get logical cluster information for UAC3 devices. */ static int get_cluster_channels_v3(struct mixer_build *state, unsigned int cluster_id) { struct uac3_cluster_header_descriptor c_header; int err; err = snd_usb_ctl_msg(state->chip->dev, usb_rcvctrlpipe(state->chip->dev, 0), UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, cluster_id, snd_usb_ctrl_intf(state->chip), &c_header, sizeof(c_header)); if (err < 0) goto error; if (err != sizeof(c_header)) { err = -EIO; goto error; } return c_header.bNrChannels; error: usb_audio_err(state->chip, "cannot request logical cluster ID: %d (err: %d)\n", cluster_id, err); return err; } /* * Get number of channels for a Mixer Unit. */ static int uac_mixer_unit_get_channels(struct mixer_build *state, struct uac_mixer_unit_descriptor *desc) { int mu_channels; switch (state->mixer->protocol) { case UAC_VERSION_1: case UAC_VERSION_2: default: if (desc->bLength < sizeof(*desc) + desc->bNrInPins + 1) return 0; /* no bmControls -> skip */ mu_channels = uac_mixer_unit_bNrChannels(desc); break; case UAC_VERSION_3: mu_channels = get_cluster_channels_v3(state, uac3_mixer_unit_wClusterDescrID(desc)); break; } return mu_channels; } /* * Parse Input Terminal Unit */ static int __check_input_term(struct mixer_build *state, int id, struct usb_audio_term *term); static int parse_term_uac1_iterm_unit(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac_input_terminal_descriptor *d = p1; term->type = le16_to_cpu(d->wTerminalType); term->channels = d->bNrChannels; term->chconfig = le16_to_cpu(d->wChannelConfig); term->name = d->iTerminal; return 0; } static int parse_term_uac2_iterm_unit(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac2_input_terminal_descriptor *d = p1; int err; /* call recursively to verify the referenced clock entity */ err = __check_input_term(state, d->bCSourceID, term); if (err < 0) return err; /* save input term properties after recursion, * to ensure they are not overriden by the recursion calls */ term->id = id; term->type = le16_to_cpu(d->wTerminalType); term->channels = d->bNrChannels; term->chconfig = le32_to_cpu(d->bmChannelConfig); term->name = d->iTerminal; return 0; } static int parse_term_uac3_iterm_unit(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac3_input_terminal_descriptor *d = p1; int err; /* call recursively to verify the referenced clock entity */ err = __check_input_term(state, d->bCSourceID, term); if (err < 0) return err; /* save input term properties after recursion, * to ensure they are not overriden by the recursion calls */ term->id = id; term->type = le16_to_cpu(d->wTerminalType); err = get_cluster_channels_v3(state, le16_to_cpu(d->wClusterDescrID)); if (err < 0) return err; term->channels = err; /* REVISIT: UAC3 IT doesn't have channels cfg */ term->chconfig = 0; term->name = le16_to_cpu(d->wTerminalDescrStr); return 0; } static int parse_term_mixer_unit(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac_mixer_unit_descriptor *d = p1; int protocol = state->mixer->protocol; int err; err = uac_mixer_unit_get_channels(state, d); if (err <= 0) return err; term->type = UAC3_MIXER_UNIT << 16; /* virtual type */ term->channels = err; if (protocol != UAC_VERSION_3) { term->chconfig = uac_mixer_unit_wChannelConfig(d, protocol); term->name = uac_mixer_unit_iMixer(d); } return 0; } static int parse_term_selector_unit(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac_selector_unit_descriptor *d = p1; int err; /* call recursively to retrieve the channel info */ err = __check_input_term(state, d->baSourceID[0], term); if (err < 0) return err; term->type = UAC3_SELECTOR_UNIT << 16; /* virtual type */ term->id = id; if (state->mixer->protocol != UAC_VERSION_3) term->name = uac_selector_unit_iSelector(d); return 0; } static int parse_term_proc_unit(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id, int vtype) { struct uac_processing_unit_descriptor *d = p1; int protocol = state->mixer->protocol; int err; if (d->bNrInPins) { /* call recursively to retrieve the channel info */ err = __check_input_term(state, d->baSourceID[0], term); if (err < 0) return err; } term->type = vtype << 16; /* virtual type */ term->id = id; if (protocol == UAC_VERSION_3) return 0; if (!term->channels) { term->channels = uac_processing_unit_bNrChannels(d); term->chconfig = uac_processing_unit_wChannelConfig(d, protocol); } term->name = uac_processing_unit_iProcessing(d, protocol); return 0; } static int parse_term_effect_unit(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac2_effect_unit_descriptor *d = p1; int err; err = __check_input_term(state, d->bSourceID, term); if (err < 0) return err; term->type = UAC3_EFFECT_UNIT << 16; /* virtual type */ term->id = id; return 0; } static int parse_term_uac2_clock_source(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac_clock_source_descriptor *d = p1; term->type = UAC3_CLOCK_SOURCE << 16; /* virtual type */ term->id = id; term->name = d->iClockSource; return 0; } static int parse_term_uac3_clock_source(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) { struct uac3_clock_source_descriptor *d = p1; term->type = UAC3_CLOCK_SOURCE << 16; /* virtual type */ term->id = id; term->name = le16_to_cpu(d->wClockSourceStr); return 0; } #define PTYPE(a, b) ((a) << 8 | (b)) /* * parse the source unit recursively until it reaches to a terminal * or a branched unit. */ static int __check_input_term(struct mixer_build *state, int id, struct usb_audio_term *term) { int protocol = state->mixer->protocol; void *p1; unsigned char *hdr; for (;;) { /* a loop in the terminal chain? */ if (test_and_set_bit(id, state->termbitmap)) return -EINVAL; p1 = find_audio_control_unit(state, id); if (!p1) break; if (!snd_usb_validate_audio_desc(p1, protocol)) break; /* bad descriptor */ hdr = p1; term->id = id; switch (PTYPE(protocol, hdr[2])) { case PTYPE(UAC_VERSION_1, UAC_FEATURE_UNIT): case PTYPE(UAC_VERSION_2, UAC_FEATURE_UNIT): case PTYPE(UAC_VERSION_3, UAC3_FEATURE_UNIT): { /* the header is the same for all versions */ struct uac_feature_unit_descriptor *d = p1; id = d->bSourceID; break; /* continue to parse */ } case PTYPE(UAC_VERSION_1, UAC_INPUT_TERMINAL): return parse_term_uac1_iterm_unit(state, term, p1, id); case PTYPE(UAC_VERSION_2, UAC_INPUT_TERMINAL): return parse_term_uac2_iterm_unit(state, term, p1, id); case PTYPE(UAC_VERSION_3, UAC_INPUT_TERMINAL): return parse_term_uac3_iterm_unit(state, term, p1, id); case PTYPE(UAC_VERSION_1, UAC_MIXER_UNIT): case PTYPE(UAC_VERSION_2, UAC_MIXER_UNIT): case PTYPE(UAC_VERSION_3, UAC3_MIXER_UNIT): return parse_term_mixer_unit(state, term, p1, id); case PTYPE(UAC_VERSION_1, UAC_SELECTOR_UNIT): case PTYPE(UAC_VERSION_2, UAC_SELECTOR_UNIT): case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SELECTOR): case PTYPE(UAC_VERSION_3, UAC3_SELECTOR_UNIT): case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SELECTOR): return parse_term_selector_unit(state, term, p1, id); case PTYPE(UAC_VERSION_1, UAC1_PROCESSING_UNIT): case PTYPE(UAC_VERSION_2, UAC2_PROCESSING_UNIT_V2): case PTYPE(UAC_VERSION_3, UAC3_PROCESSING_UNIT): return parse_term_proc_unit(state, term, p1, id, UAC3_PROCESSING_UNIT); case PTYPE(UAC_VERSION_2, UAC2_EFFECT_UNIT): case PTYPE(UAC_VERSION_3, UAC3_EFFECT_UNIT): return parse_term_effect_unit(state, term, p1, id); case PTYPE(UAC_VERSION_1, UAC1_EXTENSION_UNIT): case PTYPE(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2): case PTYPE(UAC_VERSION_3, UAC3_EXTENSION_UNIT): return parse_term_proc_unit(state, term, p1, id, UAC3_EXTENSION_UNIT); case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SOURCE): return parse_term_uac2_clock_source(state, term, p1, id); case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SOURCE): return parse_term_uac3_clock_source(state, term, p1, id); default: return -ENODEV; } } return -ENODEV; } static int check_input_term(struct mixer_build *state, int id, struct usb_audio_term *term) { memset(term, 0, sizeof(*term)); memset(state->termbitmap, 0, sizeof(state->termbitmap)); return __check_input_term(state, id, term); } /* * Feature Unit */ /* feature unit control information */ struct usb_feature_control_info { int control; const char *name; int type; /* data type for uac1 */ int type_uac2; /* data type for uac2 if different from uac1, else -1 */ }; static const struct usb_feature_control_info audio_feature_info[] = { { UAC_FU_MUTE, "Mute", USB_MIXER_INV_BOOLEAN, -1 }, { UAC_FU_VOLUME, "Volume", USB_MIXER_S16, -1 }, { UAC_FU_BASS, "Tone Control - Bass", USB_MIXER_S8, -1 }, { UAC_FU_MID, "Tone Control - Mid", USB_MIXER_S8, -1 }, { UAC_FU_TREBLE, "Tone Control - Treble", USB_MIXER_S8, -1 }, { UAC_FU_GRAPHIC_EQUALIZER, "Graphic Equalizer", USB_MIXER_S8, -1 }, /* FIXME: not implemented yet */ { UAC_FU_AUTOMATIC_GAIN, "Auto Gain Control", USB_MIXER_BOOLEAN, -1 }, { UAC_FU_DELAY, "Delay Control", USB_MIXER_U16, USB_MIXER_U32 }, { UAC_FU_BASS_BOOST, "Bass Boost", USB_MIXER_BOOLEAN, -1 }, { UAC_FU_LOUDNESS, "Loudness", USB_MIXER_BOOLEAN, -1 }, /* UAC2 specific */ { UAC2_FU_INPUT_GAIN, "Input Gain Control", USB_MIXER_S16, -1 }, { UAC2_FU_INPUT_GAIN_PAD, "Input Gain Pad Control", USB_MIXER_S16, -1 }, { UAC2_FU_PHASE_INVERTER, "Phase Inverter Control", USB_MIXER_BOOLEAN, -1 }, }; static void usb_mixer_elem_info_free(struct usb_mixer_elem_info *cval) { kfree(cval); } /* private_free callback */ void snd_usb_mixer_elem_free(struct snd_kcontrol *kctl) { usb_mixer_elem_info_free(kctl->private_data); kctl->private_data = NULL; } /* * interface to ALSA control for feature/mixer units */ /* volume control quirks */ static void volume_control_quirks(struct usb_mixer_elem_info *cval, struct snd_kcontrol *kctl) { struct snd_usb_audio *chip = cval->head.mixer->chip; switch (chip->usb_id) { case USB_ID(0x0763, 0x2030): /* M-Audio Fast Track C400 */ case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */ if (strcmp(kctl->id.name, "Effect Duration") == 0) { cval->min = 0x0000; cval->max = 0xffff; cval->res = 0x00e6; break; } if (strcmp(kctl->id.name, "Effect Volume") == 0 || strcmp(kctl->id.name, "Effect Feedback Volume") == 0) { cval->min = 0x00; cval->max = 0xff; break; } if (strstr(kctl->id.name, "Effect Return") != NULL) { cval->min = 0xb706; cval->max = 0xff7b; cval->res = 0x0073; break; } if ((strstr(kctl->id.name, "Playback Volume") != NULL) || (strstr(kctl->id.name, "Effect Send") != NULL)) { cval->min = 0xb5fb; /* -73 dB = 0xb6ff */ cval->max = 0xfcfe; cval->res = 0x0073; } break; case USB_ID(0x0763, 0x2081): /* M-Audio Fast Track Ultra 8R */ case USB_ID(0x0763, 0x2080): /* M-Audio Fast Track Ultra */ if (strcmp(kctl->id.name, "Effect Duration") == 0) { usb_audio_info(chip, "set quirk for FTU Effect Duration\n"); cval->min = 0x0000; cval->max = 0x7f00; cval->res = 0x0100; break; } if (strcmp(kctl->id.name, "Effect Volume") == 0 || strcmp(kctl->id.name, "Effect Feedback Volume") == 0) { usb_audio_info(chip, "set quirks for FTU Effect Feedback/Volume\n"); cval->min = 0x00; cval->max = 0x7f; break; } break; case USB_ID(0x0d8c, 0x0103): if (!strcmp(kctl->id.name, "PCM Playback Volume")) { usb_audio_info(chip, "set volume quirk for CM102-A+/102S+\n"); cval->min = -256; } break; case USB_ID(0x0471, 0x0101): case USB_ID(0x0471, 0x0104): case USB_ID(0x0471, 0x0105): case USB_ID(0x0672, 0x1041): /* quirk for UDA1321/N101. * note that detection between firmware 2.1.1.7 (N101) * and later 2.1.1.21 is not very clear from datasheets. * I hope that the min value is -15360 for newer firmware --jk */ if (!strcmp(kctl->id.name, "PCM Playback Volume") && cval->min == -15616) { usb_audio_info(chip, "set volume quirk for UDA1321/N101 chip\n"); cval->max = -256; } break; case USB_ID(0x046d, 0x09a4): if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, "set volume quirk for QuickCam E3500\n"); cval->min = 6080; cval->max = 8768; cval->res = 192; } break; case USB_ID(0x046d, 0x0807): /* Logitech Webcam C500 */ case USB_ID(0x046d, 0x0808): case USB_ID(0x046d, 0x0809): case USB_ID(0x046d, 0x0819): /* Logitech Webcam C210 */ case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */ case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */ case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */ case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */ case USB_ID(0x046d, 0x08ca): /* Logitech Quickcam Fusion */ case USB_ID(0x046d, 0x0991): case USB_ID(0x046d, 0x09a2): /* QuickCam Communicate Deluxe/S7500 */ /* Most audio usb devices lie about volume resolution. * Most Logitech webcams have res = 384. * Probably there is some logitech magic behind this number --fishor */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, "set resolution quirk: cval->res = 384\n"); cval->res = 384; } break; case USB_ID(0x0495, 0x3042): /* ESS Technology Asus USB DAC */ if ((strstr(kctl->id.name, "Playback Volume") != NULL) || strstr(kctl->id.name, "Capture Volume") != NULL) { cval->min >>= 8; cval->max = 0; cval->res = 1; } break; case USB_ID(0x1224, 0x2a25): /* Jieli Technology USB PHY 2.0 */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, "set resolution quirk: cval->res = 16\n"); cval->res = 16; } break; case USB_ID(0x1bcf, 0x2283): /* NexiGo N930AF FHD Webcam */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, "set resolution quirk: cval->res = 16\n"); cval->res = 16; } break; } } /* forcibly initialize the current mixer value; if GET_CUR fails, set to * the minimum as default */ static void init_cur_mix_raw(struct usb_mixer_elem_info *cval, int ch, int idx) { int val, err; err = snd_usb_get_cur_mix_value(cval, ch, idx, &val); if (!err) return; if (!cval->head.mixer->ignore_ctl_error) usb_audio_warn(cval->head.mixer->chip, "%d:%d: failed to get current value for ch %d (%d)\n", cval->head.id, mixer_ctrl_intf(cval->head.mixer), ch, err); snd_usb_set_cur_mix_value(cval, ch, idx, cval->min); } /* * retrieve the minimum and maximum values for the specified control */ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, int default_min, struct snd_kcontrol *kctl) { int i, idx; /* for failsafe */ cval->min = default_min; cval->max = cval->min + 1; cval->res = 1; cval->dBmin = cval->dBmax = 0; if (cval->val_type == USB_MIXER_BOOLEAN || cval->val_type == USB_MIXER_INV_BOOLEAN) { cval->initialized = 1; } else { int minchn = 0; if (cval->cmask) { for (i = 0; i < MAX_CHANNELS; i++) if (cval->cmask & (1 << i)) { minchn = i + 1; break; } } if (get_ctl_value(cval, UAC_GET_MAX, (cval->control << 8) | minchn, &cval->max) < 0 || get_ctl_value(cval, UAC_GET_MIN, (cval->control << 8) | minchn, &cval->min) < 0) { usb_audio_err(cval->head.mixer->chip, "%d:%d: cannot get min/max values for control %d (id %d)\n", cval->head.id, mixer_ctrl_intf(cval->head.mixer), cval->control, cval->head.id); return -EINVAL; } if (get_ctl_value(cval, UAC_GET_RES, (cval->control << 8) | minchn, &cval->res) < 0) { cval->res = 1; } else if (cval->head.mixer->protocol == UAC_VERSION_1) { int last_valid_res = cval->res; while (cval->res > 1) { if (snd_usb_mixer_set_ctl_value(cval, UAC_SET_RES, (cval->control << 8) | minchn, cval->res / 2) < 0) break; cval->res /= 2; } if (get_ctl_value(cval, UAC_GET_RES, (cval->control << 8) | minchn, &cval->res) < 0) cval->res = last_valid_res; } if (cval->res == 0) cval->res = 1; /* Additional checks for the proper resolution * * Some devices report smaller resolutions than actually * reacting. They don't return errors but simply clip * to the lower aligned value. */ if (cval->min + cval->res < cval->max) { int last_valid_res = cval->res; int saved, test, check; if (get_cur_mix_raw(cval, minchn, &saved) < 0) goto no_res_check; for (;;) { test = saved; if (test < cval->max) test += cval->res; else test -= cval->res; if (test < cval->min || test > cval->max || snd_usb_set_cur_mix_value(cval, minchn, 0, test) || get_cur_mix_raw(cval, minchn, &check)) { cval->res = last_valid_res; break; } if (test == check) break; cval->res *= 2; } snd_usb_set_cur_mix_value(cval, minchn, 0, saved); } no_res_check: cval->initialized = 1; } if (kctl) volume_control_quirks(cval, kctl); /* USB descriptions contain the dB scale in 1/256 dB unit * while ALSA TLV contains in 1/100 dB unit */ cval->dBmin = (convert_signed_value(cval, cval->min) * 100) / 256; cval->dBmax = (convert_signed_value(cval, cval->max) * 100) / 256; if (cval->dBmin > cval->dBmax) { /* something is wrong; assume it's either from/to 0dB */ if (cval->dBmin < 0) cval->dBmax = 0; else if (cval->dBmin > 0) cval->dBmin = 0; if (cval->dBmin > cval->dBmax) { /* totally crap, return an error */ return -EINVAL; } } else { /* if the max volume is too low, it's likely a bogus range; * here we use -96dB as the threshold */ if (cval->dBmax <= -9600) { usb_audio_info(cval->head.mixer->chip, "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n", cval->head.id, mixer_ctrl_intf(cval->head.mixer), cval->dBmin, cval->dBmax); cval->dBmin = cval->dBmax = 0; } } /* initialize all elements */ if (!cval->cmask) { init_cur_mix_raw(cval, 0, 0); } else { idx = 0; for (i = 0; i < MAX_CHANNELS; i++) { if (cval->cmask & (1 << i)) { init_cur_mix_raw(cval, i + 1, idx); idx++; } } } return 0; } #define get_min_max(cval, def) get_min_max_with_quirks(cval, def, NULL) /* get a feature/mixer unit info */ static int mixer_ctl_feature_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct usb_mixer_elem_info *cval = kcontrol->private_data; if (cval->val_type == USB_MIXER_BOOLEAN || cval->val_type == USB_MIXER_INV_BOOLEAN) uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; else uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = cval->channels; if (cval->val_type == USB_MIXER_BOOLEAN || cval->val_type == USB_MIXER_INV_BOOLEAN) { uinfo->value.integer.min = 0; uinfo->value.integer.max = 1; } else { if (!cval->initialized) { get_min_max_with_quirks(cval, 0, kcontrol); if (cval->initialized && cval->dBmin >= cval->dBmax) { kcontrol->vd[0].access &= ~(SNDRV_CTL_ELEM_ACCESS_TLV_READ | SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK); snd_ctl_notify(cval->head.mixer->chip->card, SNDRV_CTL_EVENT_MASK_INFO, &kcontrol->id); } } uinfo->value.integer.min = 0; uinfo->value.integer.max = DIV_ROUND_UP(cval->max - cval->min, cval->res); } return 0; } /* get the current value from feature/mixer unit */ static int mixer_ctl_feature_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int c, cnt, val, err; ucontrol->value.integer.value[0] = cval->min; if (cval->cmask) { cnt = 0; for (c = 0; c < MAX_CHANNELS; c++) { if (!(cval->cmask & (1 << c))) continue; err = snd_usb_get_cur_mix_value(cval, c + 1, cnt, &val); if (err < 0) return filter_error(cval, err); val = get_relative_value(cval, val); ucontrol->value.integer.value[cnt] = val; cnt++; } return 0; } else { /* master channel */ err = snd_usb_get_cur_mix_value(cval, 0, 0, &val); if (err < 0) return filter_error(cval, err); val = get_relative_value(cval, val); ucontrol->value.integer.value[0] = val; } return 0; } /* put the current value to feature/mixer unit */ static int mixer_ctl_feature_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int c, cnt, val, oval, err; int changed = 0; if (cval->cmask) { cnt = 0; for (c = 0; c < MAX_CHANNELS; c++) { if (!(cval->cmask & (1 << c))) continue; err = snd_usb_get_cur_mix_value(cval, c + 1, cnt, &oval); if (err < 0) return filter_error(cval, err); val = ucontrol->value.integer.value[cnt]; val = get_abs_value(cval, val); if (oval != val) { snd_usb_set_cur_mix_value(cval, c + 1, cnt, val); changed = 1; } cnt++; } } else { /* master channel */ err = snd_usb_get_cur_mix_value(cval, 0, 0, &oval); if (err < 0) return filter_error(cval, err); val = ucontrol->value.integer.value[0]; val = get_abs_value(cval, val); if (val != oval) { snd_usb_set_cur_mix_value(cval, 0, 0, val); changed = 1; } } return changed; } /* get the boolean value from the master channel of a UAC control */ static int mixer_ctl_master_bool_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int val, err; err = snd_usb_get_cur_mix_value(cval, 0, 0, &val); if (err < 0) return filter_error(cval, err); val = (val != 0); ucontrol->value.integer.value[0] = val; return 0; } static int get_connector_value(struct usb_mixer_elem_info *cval, char *name, int *val) { struct snd_usb_audio *chip = cval->head.mixer->chip; int idx = 0, validx, ret; validx = cval->control << 8 | 0; ret = snd_usb_lock_shutdown(chip) ? -EIO : 0; if (ret) goto error; idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8); if (cval->head.mixer->protocol == UAC_VERSION_2) { struct uac2_connectors_ctl_blk uac2_conn; ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC2_CS_CUR, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, validx, idx, &uac2_conn, sizeof(uac2_conn)); if (val) *val = !!uac2_conn.bNrChannels; } else { /* UAC_VERSION_3 */ struct uac3_insertion_ctl_blk uac3_conn; ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC2_CS_CUR, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, validx, idx, &uac3_conn, sizeof(uac3_conn)); if (val) *val = !!uac3_conn.bmConInserted; } snd_usb_unlock_shutdown(chip); if (ret < 0) { if (name && strstr(name, "Speaker")) { if (val) *val = 1; return 0; } error: usb_audio_err(chip, "cannot get connectors status: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n", UAC_GET_CUR, validx, idx, cval->val_type); if (val) *val = 0; return filter_error(cval, ret); } return ret; } /* get the connectors status and report it as boolean type */ static int mixer_ctl_connector_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int ret, val; ret = get_connector_value(cval, kcontrol->id.name, &val); if (ret < 0) return ret; ucontrol->value.integer.value[0] = val; return 0; } static const struct snd_kcontrol_new usb_feature_unit_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", /* will be filled later manually */ .info = mixer_ctl_feature_info, .get = mixer_ctl_feature_get, .put = mixer_ctl_feature_put, }; /* the read-only variant */ static const struct snd_kcontrol_new usb_feature_unit_ctl_ro = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", /* will be filled later manually */ .info = mixer_ctl_feature_info, .get = mixer_ctl_feature_get, .put = NULL, }; /* * A control which shows the boolean value from reading a UAC control on * the master channel. */ static const struct snd_kcontrol_new usb_bool_master_control_ctl_ro = { .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = "", /* will be filled later manually */ .access = SNDRV_CTL_ELEM_ACCESS_READ, .info = snd_ctl_boolean_mono_info, .get = mixer_ctl_master_bool_get, .put = NULL, }; static const struct snd_kcontrol_new usb_connector_ctl_ro = { .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = "", /* will be filled later manually */ .access = SNDRV_CTL_ELEM_ACCESS_READ, .info = snd_ctl_boolean_mono_info, .get = mixer_ctl_connector_get, .put = NULL, }; /* * This symbol is exported in order to allow the mixer quirks to * hook up to the standard feature unit control mechanism */ const struct snd_kcontrol_new *snd_usb_feature_unit_ctl = &usb_feature_unit_ctl; /* * build a feature control */ static size_t append_ctl_name(struct snd_kcontrol *kctl, const char *str) { return strlcat(kctl->id.name, str, sizeof(kctl->id.name)); } /* * A lot of headsets/headphones have a "Speaker" mixer. Make sure we * rename it to "Headphone". We determine if something is a headphone * similar to how udev determines form factor. */ static void check_no_speaker_on_headset(struct snd_kcontrol *kctl, struct snd_card *card) { static const char * const names_to_check[] = { "Headset", "headset", "Headphone", "headphone", NULL}; const char * const *s; bool found = false; if (strcmp("Speaker", kctl->id.name)) return; for (s = names_to_check; *s; s++) if (strstr(card->shortname, *s)) { found = true; break; } if (!found) return; snd_ctl_rename(card, kctl, "Headphone"); } static const struct usb_feature_control_info *get_feature_control_info(int control) { int i; for (i = 0; i < ARRAY_SIZE(audio_feature_info); ++i) { if (audio_feature_info[i].control == control) return &audio_feature_info[i]; } return NULL; } static void __build_feature_ctl(struct usb_mixer_interface *mixer, const struct usbmix_name_map *imap, unsigned int ctl_mask, int control, struct usb_audio_term *iterm, struct usb_audio_term *oterm, int unitid, int nameid, int readonly_mask) { const struct usb_feature_control_info *ctl_info; unsigned int len = 0; int mapped_name = 0; struct snd_kcontrol *kctl; struct usb_mixer_elem_info *cval; const struct usbmix_name_map *map; unsigned int range; if (control == UAC_FU_GRAPHIC_EQUALIZER) { /* FIXME: not supported yet */ return; } map = find_map(imap, unitid, control); if (check_ignored_ctl(map)) return; cval = kzalloc(sizeof(*cval), GFP_KERNEL); if (!cval) return; snd_usb_mixer_elem_init_std(&cval->head, mixer, unitid); cval->control = control; cval->cmask = ctl_mask; ctl_info = get_feature_control_info(control); if (!ctl_info) { usb_mixer_elem_info_free(cval); return; } if (mixer->protocol == UAC_VERSION_1) cval->val_type = ctl_info->type; else /* UAC_VERSION_2 */ cval->val_type = ctl_info->type_uac2 >= 0 ? ctl_info->type_uac2 : ctl_info->type; if (ctl_mask == 0) { cval->channels = 1; /* master channel */ cval->master_readonly = readonly_mask; } else { int i, c = 0; for (i = 0; i < 16; i++) if (ctl_mask & (1 << i)) c++; cval->channels = c; cval->ch_readonly = readonly_mask; } /* * If all channels in the mask are marked read-only, make the control * read-only. snd_usb_set_cur_mix_value() will check the mask again and won't * issue write commands to read-only channels. */ if (cval->channels == readonly_mask) kctl = snd_ctl_new1(&usb_feature_unit_ctl_ro, cval); else kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval); if (!kctl) { usb_audio_err(mixer->chip, "cannot malloc kcontrol\n"); usb_mixer_elem_info_free(cval); return; } kctl->private_free = snd_usb_mixer_elem_free; len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); mapped_name = len != 0; if (!len && nameid) len = snd_usb_copy_string_desc(mixer->chip, nameid, kctl->id.name, sizeof(kctl->id.name)); switch (control) { case UAC_FU_MUTE: case UAC_FU_VOLUME: /* * determine the control name. the rule is: * - if a name id is given in descriptor, use it. * - if the connected input can be determined, then use the name * of terminal type. * - if the connected output can be determined, use it. * - otherwise, anonymous name. */ if (!len) { if (iterm) len = get_term_name(mixer->chip, iterm, kctl->id.name, sizeof(kctl->id.name), 1); if (!len && oterm) len = get_term_name(mixer->chip, oterm, kctl->id.name, sizeof(kctl->id.name), 1); if (!len) snprintf(kctl->id.name, sizeof(kctl->id.name), "Feature %d", unitid); } if (!mapped_name) check_no_speaker_on_headset(kctl, mixer->chip->card); /* * determine the stream direction: * if the connected output is USB stream, then it's likely a * capture stream. otherwise it should be playback (hopefully :) */ if (!mapped_name && oterm && !(oterm->type >> 16)) { if ((oterm->type & 0xff00) == 0x0100) append_ctl_name(kctl, " Capture"); else append_ctl_name(kctl, " Playback"); } append_ctl_name(kctl, control == UAC_FU_MUTE ? " Switch" : " Volume"); break; default: if (!len) strscpy(kctl->id.name, audio_feature_info[control-1].name, sizeof(kctl->id.name)); break; } /* get min/max values */ get_min_max_with_quirks(cval, 0, kctl); /* skip a bogus volume range */ if (cval->max <= cval->min) { usb_audio_dbg(mixer->chip, "[%d] FU [%s] skipped due to invalid volume\n", cval->head.id, kctl->id.name); snd_ctl_free_one(kctl); return; } if (control == UAC_FU_VOLUME) { check_mapped_dB(map, cval); if (cval->dBmin < cval->dBmax || !cval->initialized) { kctl->tlv.c = snd_usb_mixer_vol_tlv; kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ | SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; } } snd_usb_mixer_fu_apply_quirk(mixer, cval, unitid, kctl); range = (cval->max - cval->min) / cval->res; /* * Are there devices with volume range more than 255? I use a bit more * to be sure. 384 is a resolution magic number found on Logitech * devices. It will definitively catch all buggy Logitech devices. */ if (range > 384) { usb_audio_warn(mixer->chip, "Warning! Unlikely big volume range (=%u), cval->res is probably wrong.", range); usb_audio_warn(mixer->chip, "[%d] FU [%s] ch = %d, val = %d/%d/%d", cval->head.id, kctl->id.name, cval->channels, cval->min, cval->max, cval->res); } usb_audio_dbg(mixer->chip, "[%d] FU [%s] ch = %d, val = %d/%d/%d\n", cval->head.id, kctl->id.name, cval->channels, cval->min, cval->max, cval->res); snd_usb_mixer_add_control(&cval->head, kctl); } static void build_feature_ctl(struct mixer_build *state, void *raw_desc, unsigned int ctl_mask, int control, struct usb_audio_term *iterm, int unitid, int readonly_mask) { struct uac_feature_unit_descriptor *desc = raw_desc; int nameid = uac_feature_unit_iFeature(desc); __build_feature_ctl(state->mixer, state->map, ctl_mask, control, iterm, &state->oterm, unitid, nameid, readonly_mask); } static void build_feature_ctl_badd(struct usb_mixer_interface *mixer, unsigned int ctl_mask, int control, int unitid, const struct usbmix_name_map *badd_map) { __build_feature_ctl(mixer, badd_map, ctl_mask, control, NULL, NULL, unitid, 0, 0); } static void get_connector_control_name(struct usb_mixer_interface *mixer, struct usb_audio_term *term, bool is_input, char *name, int name_size) { int name_len = get_term_name(mixer->chip, term, name, name_size, 0); if (name_len == 0) strscpy(name, "Unknown", name_size); /* * sound/core/ctljack.c has a convention of naming jack controls * by ending in " Jack". Make it slightly more useful by * indicating Input or Output after the terminal name. */ if (is_input) strlcat(name, " - Input Jack", name_size); else strlcat(name, " - Output Jack", name_size); } /* get connector value to "wake up" the USB audio */ static int connector_mixer_resume(struct usb_mixer_elem_list *list) { struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); get_connector_value(cval, NULL, NULL); return 0; } /* Build a mixer control for a UAC connector control (jack-detect) */ static void build_connector_control(struct usb_mixer_interface *mixer, const struct usbmix_name_map *imap, struct usb_audio_term *term, bool is_input) { struct snd_kcontrol *kctl; struct usb_mixer_elem_info *cval; const struct usbmix_name_map *map; map = find_map(imap, term->id, 0); if (check_ignored_ctl(map)) return; cval = kzalloc(sizeof(*cval), GFP_KERNEL); if (!cval) return; snd_usb_mixer_elem_init_std(&cval->head, mixer, term->id); /* set up a specific resume callback */ cval->head.resume = connector_mixer_resume; /* * UAC2: The first byte from reading the UAC2_TE_CONNECTOR control returns the * number of channels connected. * * UAC3: The first byte specifies size of bitmap for the inserted controls. The * following byte(s) specifies which connectors are inserted. * * This boolean ctl will simply report if any channels are connected * or not. */ if (mixer->protocol == UAC_VERSION_2) cval->control = UAC2_TE_CONNECTOR; else /* UAC_VERSION_3 */ cval->control = UAC3_TE_INSERTION; cval->val_type = USB_MIXER_BOOLEAN; cval->channels = 1; /* report true if any channel is connected */ cval->min = 0; cval->max = 1; kctl = snd_ctl_new1(&usb_connector_ctl_ro, cval); if (!kctl) { usb_audio_err(mixer->chip, "cannot malloc kcontrol\n"); usb_mixer_elem_info_free(cval); return; } if (check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name))) strlcat(kctl->id.name, " Jack", sizeof(kctl->id.name)); else get_connector_control_name(mixer, term, is_input, kctl->id.name, sizeof(kctl->id.name)); kctl->private_free = snd_usb_mixer_elem_free; snd_usb_mixer_add_control(&cval->head, kctl); } static int parse_clock_source_unit(struct mixer_build *state, int unitid, void *_ftr) { struct uac_clock_source_descriptor *hdr = _ftr; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; int ret; if (state->mixer->protocol != UAC_VERSION_2) return -EINVAL; /* * The only property of this unit we are interested in is the * clock source validity. If that isn't readable, just bail out. */ if (!uac_v2v3_control_is_readable(hdr->bmControls, UAC2_CS_CONTROL_CLOCK_VALID)) return 0; cval = kzalloc(sizeof(*cval), GFP_KERNEL); if (!cval) return -ENOMEM; snd_usb_mixer_elem_init_std(&cval->head, state->mixer, hdr->bClockID); cval->min = 0; cval->max = 1; cval->channels = 1; cval->val_type = USB_MIXER_BOOLEAN; cval->control = UAC2_CS_CONTROL_CLOCK_VALID; cval->master_readonly = 1; /* From UAC2 5.2.5.1.2 "Only the get request is supported." */ kctl = snd_ctl_new1(&usb_bool_master_control_ctl_ro, cval); if (!kctl) { usb_mixer_elem_info_free(cval); return -ENOMEM; } kctl->private_free = snd_usb_mixer_elem_free; ret = snd_usb_copy_string_desc(state->chip, hdr->iClockSource, kctl->id.name, sizeof(kctl->id.name)); if (ret > 0) append_ctl_name(kctl, " Validity"); else snprintf(kctl->id.name, sizeof(kctl->id.name), "Clock Source %d Validity", hdr->bClockID); return snd_usb_mixer_add_control(&cval->head, kctl); } /* * parse a feature unit * * most of controls are defined here. */ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void *_ftr) { int channels, i, j; struct usb_audio_term iterm; unsigned int master_bits; int err, csize; struct uac_feature_unit_descriptor *hdr = _ftr; __u8 *bmaControls; if (state->mixer->protocol == UAC_VERSION_1) { csize = hdr->bControlSize; channels = (hdr->bLength - 7) / csize - 1; bmaControls = hdr->bmaControls; } else if (state->mixer->protocol == UAC_VERSION_2) { struct uac2_feature_unit_descriptor *ftr = _ftr; csize = 4; channels = (hdr->bLength - 6) / 4 - 1; bmaControls = ftr->bmaControls; } else { /* UAC_VERSION_3 */ struct uac3_feature_unit_descriptor *ftr = _ftr; csize = 4; channels = (ftr->bLength - 7) / 4 - 1; bmaControls = ftr->bmaControls; } /* parse the source unit */ err = parse_audio_unit(state, hdr->bSourceID); if (err < 0) return err; /* determine the input source type and name */ err = check_input_term(state, hdr->bSourceID, &iterm); if (err < 0) return err; master_bits = snd_usb_combine_bytes(bmaControls, csize); /* master configuration quirks */ switch (state->chip->usb_id) { case USB_ID(0x08bb, 0x2702): usb_audio_info(state->chip, "usbmixer: master volume quirk for PCM2702 chip\n"); /* disable non-functional volume control */ master_bits &= ~UAC_CONTROL_BIT(UAC_FU_VOLUME); break; case USB_ID(0x1130, 0xf211): usb_audio_info(state->chip, "usbmixer: volume control quirk for Tenx TP6911 Audio Headset\n"); /* disable non-functional volume control */ channels = 0; break; } if (state->mixer->protocol == UAC_VERSION_1) { /* check all control types */ for (i = 0; i < 10; i++) { unsigned int ch_bits = 0; int control = audio_feature_info[i].control; for (j = 0; j < channels; j++) { unsigned int mask; mask = snd_usb_combine_bytes(bmaControls + csize * (j+1), csize); if (mask & (1 << i)) ch_bits |= (1 << j); } /* audio class v1 controls are never read-only */ /* * The first channel must be set * (for ease of programming). */ if (ch_bits & 1) build_feature_ctl(state, _ftr, ch_bits, control, &iterm, unitid, 0); if (master_bits & (1 << i)) build_feature_ctl(state, _ftr, 0, control, &iterm, unitid, 0); } } else { /* UAC_VERSION_2/3 */ for (i = 0; i < ARRAY_SIZE(audio_feature_info); i++) { unsigned int ch_bits = 0; unsigned int ch_read_only = 0; int control = audio_feature_info[i].control; for (j = 0; j < channels; j++) { unsigned int mask; mask = snd_usb_combine_bytes(bmaControls + csize * (j+1), csize); if (uac_v2v3_control_is_readable(mask, control)) { ch_bits |= (1 << j); if (!uac_v2v3_control_is_writeable(mask, control)) ch_read_only |= (1 << j); } } /* * NOTE: build_feature_ctl() will mark the control * read-only if all channels are marked read-only in * the descriptors. Otherwise, the control will be * reported as writeable, but the driver will not * actually issue a write command for read-only * channels. */ /* * The first channel must be set * (for ease of programming). */ if (ch_bits & 1) build_feature_ctl(state, _ftr, ch_bits, control, &iterm, unitid, ch_read_only); if (uac_v2v3_control_is_readable(master_bits, control)) build_feature_ctl(state, _ftr, 0, control, &iterm, unitid, !uac_v2v3_control_is_writeable(master_bits, control)); } } return 0; } /* * Mixer Unit */ /* check whether the given in/out overflows bmMixerControls matrix */ static bool mixer_bitmap_overflow(struct uac_mixer_unit_descriptor *desc, int protocol, int num_ins, int num_outs) { u8 *hdr = (u8 *)desc; u8 *c = uac_mixer_unit_bmControls(desc, protocol); size_t rest; /* remaining bytes after bmMixerControls */ switch (protocol) { case UAC_VERSION_1: default: rest = 1; /* iMixer */ break; case UAC_VERSION_2: rest = 2; /* bmControls + iMixer */ break; case UAC_VERSION_3: rest = 6; /* bmControls + wMixerDescrStr */ break; } /* overflow? */ return c + (num_ins * num_outs + 7) / 8 + rest > hdr + hdr[0]; } /* * build a mixer unit control * * the callbacks are identical with feature unit. * input channel number (zero based) is given in control field instead. */ static void build_mixer_unit_ctl(struct mixer_build *state, struct uac_mixer_unit_descriptor *desc, int in_pin, int in_ch, int num_outs, int unitid, struct usb_audio_term *iterm) { struct usb_mixer_elem_info *cval; unsigned int i, len; struct snd_kcontrol *kctl; const struct usbmix_name_map *map; map = find_map(state->map, unitid, 0); if (check_ignored_ctl(map)) return; cval = kzalloc(sizeof(*cval), GFP_KERNEL); if (!cval) return; snd_usb_mixer_elem_init_std(&cval->head, state->mixer, unitid); cval->control = in_ch + 1; /* based on 1 */ cval->val_type = USB_MIXER_S16; for (i = 0; i < num_outs; i++) { __u8 *c = uac_mixer_unit_bmControls(desc, state->mixer->protocol); if (check_matrix_bitmap(c, in_ch, i, num_outs)) { cval->cmask |= (1 << i); cval->channels++; } } /* get min/max values */ get_min_max(cval, 0); kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval); if (!kctl) { usb_audio_err(state->chip, "cannot malloc kcontrol\n"); usb_mixer_elem_info_free(cval); return; } kctl->private_free = snd_usb_mixer_elem_free; len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); if (!len) len = get_term_name(state->chip, iterm, kctl->id.name, sizeof(kctl->id.name), 0); if (!len) len = sprintf(kctl->id.name, "Mixer Source %d", in_ch + 1); append_ctl_name(kctl, " Volume"); usb_audio_dbg(state->chip, "[%d] MU [%s] ch = %d, val = %d/%d\n", cval->head.id, kctl->id.name, cval->channels, cval->min, cval->max); snd_usb_mixer_add_control(&cval->head, kctl); } static int parse_audio_input_terminal(struct mixer_build *state, int unitid, void *raw_desc) { struct usb_audio_term iterm; unsigned int control, bmctls, term_id; if (state->mixer->protocol == UAC_VERSION_2) { struct uac2_input_terminal_descriptor *d_v2 = raw_desc; control = UAC2_TE_CONNECTOR; term_id = d_v2->bTerminalID; bmctls = le16_to_cpu(d_v2->bmControls); } else if (state->mixer->protocol == UAC_VERSION_3) { struct uac3_input_terminal_descriptor *d_v3 = raw_desc; control = UAC3_TE_INSERTION; term_id = d_v3->bTerminalID; bmctls = le32_to_cpu(d_v3->bmControls); } else { return 0; /* UAC1. No Insertion control */ } check_input_term(state, term_id, &iterm); /* Check for jack detection. */ if ((iterm.type & 0xff00) != 0x0100 && uac_v2v3_control_is_readable(bmctls, control)) build_connector_control(state->mixer, state->map, &iterm, true); return 0; } /* * parse a mixer unit */ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid, void *raw_desc) { struct uac_mixer_unit_descriptor *desc = raw_desc; struct usb_audio_term iterm; int input_pins, num_ins, num_outs; int pin, ich, err; err = uac_mixer_unit_get_channels(state, desc); if (err < 0) { usb_audio_err(state->chip, "invalid MIXER UNIT descriptor %d\n", unitid); return err; } num_outs = err; input_pins = desc->bNrInPins; num_ins = 0; ich = 0; for (pin = 0; pin < input_pins; pin++) { err = parse_audio_unit(state, desc->baSourceID[pin]); if (err < 0) continue; /* no bmControls field (e.g. Maya44) -> ignore */ if (!num_outs) continue; err = check_input_term(state, desc->baSourceID[pin], &iterm); if (err < 0) return err; num_ins += iterm.channels; if (mixer_bitmap_overflow(desc, state->mixer->protocol, num_ins, num_outs)) break; for (; ich < num_ins; ich++) { int och, ich_has_controls = 0; for (och = 0; och < num_outs; och++) { __u8 *c = uac_mixer_unit_bmControls(desc, state->mixer->protocol); if (check_matrix_bitmap(c, ich, och, num_outs)) { ich_has_controls = 1; break; } } if (ich_has_controls) build_mixer_unit_ctl(state, desc, pin, ich, num_outs, unitid, &iterm); } } return 0; } /* * Processing Unit / Extension Unit */ /* get callback for processing/extension unit */ static int mixer_ctl_procunit_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int err, val; err = get_cur_ctl_value(cval, cval->control << 8, &val); if (err < 0) { ucontrol->value.integer.value[0] = cval->min; return filter_error(cval, err); } val = get_relative_value(cval, val); ucontrol->value.integer.value[0] = val; return 0; } /* put callback for processing/extension unit */ static int mixer_ctl_procunit_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int val, oval, err; err = get_cur_ctl_value(cval, cval->control << 8, &oval); if (err < 0) return filter_error(cval, err); val = ucontrol->value.integer.value[0]; val = get_abs_value(cval, val); if (val != oval) { set_cur_ctl_value(cval, cval->control << 8, val); return 1; } return 0; } /* alsa control interface for processing/extension unit */ static const struct snd_kcontrol_new mixer_procunit_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", /* will be filled later */ .info = mixer_ctl_feature_info, .get = mixer_ctl_procunit_get, .put = mixer_ctl_procunit_put, }; /* * predefined data for processing units */ struct procunit_value_info { int control; const char *suffix; int val_type; int min_value; }; struct procunit_info { int type; char *name; const struct procunit_value_info *values; }; static const struct procunit_value_info undefined_proc_info[] = { { 0x00, "Control Undefined", 0 }, { 0 } }; static const struct procunit_value_info updown_proc_info[] = { { UAC_UD_ENABLE, "Switch", USB_MIXER_BOOLEAN }, { UAC_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 }, { 0 } }; static const struct procunit_value_info prologic_proc_info[] = { { UAC_DP_ENABLE, "Switch", USB_MIXER_BOOLEAN }, { UAC_DP_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 }, { 0 } }; static const struct procunit_value_info threed_enh_proc_info[] = { { UAC_3D_ENABLE, "Switch", USB_MIXER_BOOLEAN }, { UAC_3D_SPACE, "Spaciousness", USB_MIXER_U8 }, { 0 } }; static const struct procunit_value_info reverb_proc_info[] = { { UAC_REVERB_ENABLE, "Switch", USB_MIXER_BOOLEAN }, { UAC_REVERB_LEVEL, "Level", USB_MIXER_U8 }, { UAC_REVERB_TIME, "Time", USB_MIXER_U16 }, { UAC_REVERB_FEEDBACK, "Feedback", USB_MIXER_U8 }, { 0 } }; static const struct procunit_value_info chorus_proc_info[] = { { UAC_CHORUS_ENABLE, "Switch", USB_MIXER_BOOLEAN }, { UAC_CHORUS_LEVEL, "Level", USB_MIXER_U8 }, { UAC_CHORUS_RATE, "Rate", USB_MIXER_U16 }, { UAC_CHORUS_DEPTH, "Depth", USB_MIXER_U16 }, { 0 } }; static const struct procunit_value_info dcr_proc_info[] = { { UAC_DCR_ENABLE, "Switch", USB_MIXER_BOOLEAN }, { UAC_DCR_RATE, "Ratio", USB_MIXER_U16 }, { UAC_DCR_MAXAMPL, "Max Amp", USB_MIXER_S16 }, { UAC_DCR_THRESHOLD, "Threshold", USB_MIXER_S16 }, { UAC_DCR_ATTACK_TIME, "Attack Time", USB_MIXER_U16 }, { UAC_DCR_RELEASE_TIME, "Release Time", USB_MIXER_U16 }, { 0 } }; static const struct procunit_info procunits[] = { { UAC_PROCESS_UP_DOWNMIX, "Up Down", updown_proc_info }, { UAC_PROCESS_DOLBY_PROLOGIC, "Dolby Prologic", prologic_proc_info }, { UAC_PROCESS_STEREO_EXTENDER, "3D Stereo Extender", threed_enh_proc_info }, { UAC_PROCESS_REVERB, "Reverb", reverb_proc_info }, { UAC_PROCESS_CHORUS, "Chorus", chorus_proc_info }, { UAC_PROCESS_DYN_RANGE_COMP, "DCR", dcr_proc_info }, { 0 }, }; static const struct procunit_value_info uac3_updown_proc_info[] = { { UAC3_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 }, { 0 } }; static const struct procunit_value_info uac3_stereo_ext_proc_info[] = { { UAC3_EXT_WIDTH_CONTROL, "Width Control", USB_MIXER_U8 }, { 0 } }; static const struct procunit_info uac3_procunits[] = { { UAC3_PROCESS_UP_DOWNMIX, "Up Down", uac3_updown_proc_info }, { UAC3_PROCESS_STEREO_EXTENDER, "3D Stereo Extender", uac3_stereo_ext_proc_info }, { UAC3_PROCESS_MULTI_FUNCTION, "Multi-Function", undefined_proc_info }, { 0 }, }; /* * predefined data for extension units */ static const struct procunit_value_info clock_rate_xu_info[] = { { USB_XU_CLOCK_RATE_SELECTOR, "Selector", USB_MIXER_U8, 0 }, { 0 } }; static const struct procunit_value_info clock_source_xu_info[] = { { USB_XU_CLOCK_SOURCE_SELECTOR, "External", USB_MIXER_BOOLEAN }, { 0 } }; static const struct procunit_value_info spdif_format_xu_info[] = { { USB_XU_DIGITAL_FORMAT_SELECTOR, "SPDIF/AC3", USB_MIXER_BOOLEAN }, { 0 } }; static const struct procunit_value_info soft_limit_xu_info[] = { { USB_XU_SOFT_LIMIT_SELECTOR, " ", USB_MIXER_BOOLEAN }, { 0 } }; static const struct procunit_info extunits[] = { { USB_XU_CLOCK_RATE, "Clock rate", clock_rate_xu_info }, { USB_XU_CLOCK_SOURCE, "DigitalIn CLK source", clock_source_xu_info }, { USB_XU_DIGITAL_IO_STATUS, "DigitalOut format:", spdif_format_xu_info }, { USB_XU_DEVICE_OPTIONS, "AnalogueIn Soft Limit", soft_limit_xu_info }, { 0 } }; /* * build a processing/extension unit */ static int build_audio_procunit(struct mixer_build *state, int unitid, void *raw_desc, const struct procunit_info *list, bool extension_unit) { struct uac_processing_unit_descriptor *desc = raw_desc; int num_ins; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; int i, err, nameid, type, len, val; const struct procunit_info *info; const struct procunit_value_info *valinfo; const struct usbmix_name_map *map; static const struct procunit_value_info default_value_info[] = { { 0x01, "Switch", USB_MIXER_BOOLEAN }, { 0 } }; static const struct procunit_info default_info = { 0, NULL, default_value_info }; const char *name = extension_unit ? "Extension Unit" : "Processing Unit"; num_ins = desc->bNrInPins; for (i = 0; i < num_ins; i++) { err = parse_audio_unit(state, desc->baSourceID[i]); if (err < 0) return err; } type = le16_to_cpu(desc->wProcessType); for (info = list; info && info->type; info++) if (info->type == type) break; if (!info || !info->type) info = &default_info; for (valinfo = info->values; valinfo->control; valinfo++) { __u8 *controls = uac_processing_unit_bmControls(desc, state->mixer->protocol); if (state->mixer->protocol == UAC_VERSION_1) { if (!(controls[valinfo->control / 8] & (1 << ((valinfo->control % 8) - 1)))) continue; } else { /* UAC_VERSION_2/3 */ if (!uac_v2v3_control_is_readable(controls[valinfo->control / 8], valinfo->control)) continue; } map = find_map(state->map, unitid, valinfo->control); if (check_ignored_ctl(map)) continue; cval = kzalloc(sizeof(*cval), GFP_KERNEL); if (!cval) return -ENOMEM; snd_usb_mixer_elem_init_std(&cval->head, state->mixer, unitid); cval->control = valinfo->control; cval->val_type = valinfo->val_type; cval->channels = 1; if (state->mixer->protocol > UAC_VERSION_1 && !uac_v2v3_control_is_writeable(controls[valinfo->control / 8], valinfo->control)) cval->master_readonly = 1; /* get min/max values */ switch (type) { case UAC_PROCESS_UP_DOWNMIX: { bool mode_sel = false; switch (state->mixer->protocol) { case UAC_VERSION_1: case UAC_VERSION_2: default: if (cval->control == UAC_UD_MODE_SELECT) mode_sel = true; break; case UAC_VERSION_3: if (cval->control == UAC3_UD_MODE_SELECT) mode_sel = true; break; } if (mode_sel) { __u8 *control_spec = uac_processing_unit_specific(desc, state->mixer->protocol); cval->min = 1; cval->max = control_spec[0]; cval->res = 1; cval->initialized = 1; break; } get_min_max(cval, valinfo->min_value); break; } case USB_XU_CLOCK_RATE: /* * E-Mu USB 0404/0202/TrackerPre/0204 * samplerate control quirk */ cval->min = 0; cval->max = 5; cval->res = 1; cval->initialized = 1; break; default: get_min_max(cval, valinfo->min_value); break; } err = get_cur_ctl_value(cval, cval->control << 8, &val); if (err < 0) { usb_mixer_elem_info_free(cval); return -EINVAL; } kctl = snd_ctl_new1(&mixer_procunit_ctl, cval); if (!kctl) { usb_mixer_elem_info_free(cval); return -ENOMEM; } kctl->private_free = snd_usb_mixer_elem_free; if (check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name))) { /* nothing */ ; } else if (info->name) { strscpy(kctl->id.name, info->name, sizeof(kctl->id.name)); } else { if (extension_unit) nameid = uac_extension_unit_iExtension(desc, state->mixer->protocol); else nameid = uac_processing_unit_iProcessing(desc, state->mixer->protocol); len = 0; if (nameid) len = snd_usb_copy_string_desc(state->chip, nameid, kctl->id.name, sizeof(kctl->id.name)); if (!len) strscpy(kctl->id.name, name, sizeof(kctl->id.name)); } append_ctl_name(kctl, " "); append_ctl_name(kctl, valinfo->suffix); usb_audio_dbg(state->chip, "[%d] PU [%s] ch = %d, val = %d/%d\n", cval->head.id, kctl->id.name, cval->channels, cval->min, cval->max); err = snd_usb_mixer_add_control(&cval->head, kctl); if (err < 0) return err; } return 0; } static int parse_audio_processing_unit(struct mixer_build *state, int unitid, void *raw_desc) { switch (state->mixer->protocol) { case UAC_VERSION_1: case UAC_VERSION_2: default: return build_audio_procunit(state, unitid, raw_desc, procunits, false); case UAC_VERSION_3: return build_audio_procunit(state, unitid, raw_desc, uac3_procunits, false); } } static int parse_audio_extension_unit(struct mixer_build *state, int unitid, void *raw_desc) { /* * Note that we parse extension units with processing unit descriptors. * That's ok as the layout is the same. */ return build_audio_procunit(state, unitid, raw_desc, extunits, true); } /* * Selector Unit */ /* * info callback for selector unit * use an enumerator type for routing */ static int mixer_ctl_selector_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct usb_mixer_elem_info *cval = kcontrol->private_data; const char **itemlist = (const char **)kcontrol->private_value; if (snd_BUG_ON(!itemlist)) return -EINVAL; return snd_ctl_enum_info(uinfo, 1, cval->max, itemlist); } /* get callback for selector unit */ static int mixer_ctl_selector_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int val, err; err = get_cur_ctl_value(cval, cval->control << 8, &val); if (err < 0) { ucontrol->value.enumerated.item[0] = 0; return filter_error(cval, err); } val = get_relative_value(cval, val); ucontrol->value.enumerated.item[0] = val; return 0; } /* put callback for selector unit */ static int mixer_ctl_selector_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *cval = kcontrol->private_data; int val, oval, err; err = get_cur_ctl_value(cval, cval->control << 8, &oval); if (err < 0) return filter_error(cval, err); val = ucontrol->value.enumerated.item[0]; val = get_abs_value(cval, val); if (val != oval) { set_cur_ctl_value(cval, cval->control << 8, val); return 1; } return 0; } /* alsa control interface for selector unit */ static const struct snd_kcontrol_new mixer_selectunit_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", /* will be filled later */ .info = mixer_ctl_selector_info, .get = mixer_ctl_selector_get, .put = mixer_ctl_selector_put, }; /* * private free callback. * free both private_data and private_value */ static void usb_mixer_selector_elem_free(struct snd_kcontrol *kctl) { int i, num_ins = 0; if (kctl->private_data) { struct usb_mixer_elem_info *cval = kctl->private_data; num_ins = cval->max; usb_mixer_elem_info_free(cval); kctl->private_data = NULL; } if (kctl->private_value) { char **itemlist = (char **)kctl->private_value; for (i = 0; i < num_ins; i++) kfree(itemlist[i]); kfree(itemlist); kctl->private_value = 0; } } /* * parse a selector unit */ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, void *raw_desc) { struct uac_selector_unit_descriptor *desc = raw_desc; unsigned int i, nameid, len; int err; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; const struct usbmix_name_map *map; char **namelist; for (i = 0; i < desc->bNrInPins; i++) { err = parse_audio_unit(state, desc->baSourceID[i]); if (err < 0) return err; } if (desc->bNrInPins == 1) /* only one ? nonsense! */ return 0; map = find_map(state->map, unitid, 0); if (check_ignored_ctl(map)) return 0; cval = kzalloc(sizeof(*cval), GFP_KERNEL); if (!cval) return -ENOMEM; snd_usb_mixer_elem_init_std(&cval->head, state->mixer, unitid); cval->val_type = USB_MIXER_U8; cval->channels = 1; cval->min = 1; cval->max = desc->bNrInPins; cval->res = 1; cval->initialized = 1; switch (state->mixer->protocol) { case UAC_VERSION_1: default: cval->control = 0; break; case UAC_VERSION_2: case UAC_VERSION_3: if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR || desc->bDescriptorSubtype == UAC3_CLOCK_SELECTOR) cval->control = UAC2_CX_CLOCK_SELECTOR; else /* UAC2/3_SELECTOR_UNIT */ cval->control = UAC2_SU_SELECTOR; break; } namelist = kcalloc(desc->bNrInPins, sizeof(char *), GFP_KERNEL); if (!namelist) { err = -ENOMEM; goto error_cval; } #define MAX_ITEM_NAME_LEN 64 for (i = 0; i < desc->bNrInPins; i++) { struct usb_audio_term iterm; namelist[i] = kmalloc(MAX_ITEM_NAME_LEN, GFP_KERNEL); if (!namelist[i]) { err = -ENOMEM; goto error_name; } len = check_mapped_selector_name(state, unitid, i, namelist[i], MAX_ITEM_NAME_LEN); if (! len && check_input_term(state, desc->baSourceID[i], &iterm) >= 0) len = get_term_name(state->chip, &iterm, namelist[i], MAX_ITEM_NAME_LEN, 0); if (! len) sprintf(namelist[i], "Input %u", i); } kctl = snd_ctl_new1(&mixer_selectunit_ctl, cval); if (! kctl) { usb_audio_err(state->chip, "cannot malloc kcontrol\n"); err = -ENOMEM; goto error_name; } kctl->private_value = (unsigned long)namelist; kctl->private_free = usb_mixer_selector_elem_free; /* check the static mapping table at first */ len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); if (!len) { /* no mapping ? */ switch (state->mixer->protocol) { case UAC_VERSION_1: case UAC_VERSION_2: default: /* if iSelector is given, use it */ nameid = uac_selector_unit_iSelector(desc); if (nameid) len = snd_usb_copy_string_desc(state->chip, nameid, kctl->id.name, sizeof(kctl->id.name)); break; case UAC_VERSION_3: /* TODO: Class-Specific strings not yet supported */ break; } /* ... or pick up the terminal name at next */ if (!len) len = get_term_name(state->chip, &state->oterm, kctl->id.name, sizeof(kctl->id.name), 0); /* ... or use the fixed string "USB" as the last resort */ if (!len) strscpy(kctl->id.name, "USB", sizeof(kctl->id.name)); /* and add the proper suffix */ if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR || desc->bDescriptorSubtype == UAC3_CLOCK_SELECTOR) append_ctl_name(kctl, " Clock Source"); else if ((state->oterm.type & 0xff00) == 0x0100) append_ctl_name(kctl, " Capture Source"); else append_ctl_name(kctl, " Playback Source"); } usb_audio_dbg(state->chip, "[%d] SU [%s] items = %d\n", cval->head.id, kctl->id.name, desc->bNrInPins); return snd_usb_mixer_add_control(&cval->head, kctl); error_name: for (i = 0; i < desc->bNrInPins; i++) kfree(namelist[i]); kfree(namelist); error_cval: usb_mixer_elem_info_free(cval); return err; } /* * parse an audio unit recursively */ static int parse_audio_unit(struct mixer_build *state, int unitid) { unsigned char *p1; int protocol = state->mixer->protocol; if (test_and_set_bit(unitid, state->unitbitmap)) return 0; /* the unit already visited */ p1 = find_audio_control_unit(state, unitid); if (!p1) { usb_audio_err(state->chip, "unit %d not found!\n", unitid); return -EINVAL; } if (!snd_usb_validate_audio_desc(p1, protocol)) { usb_audio_dbg(state->chip, "invalid unit %d\n", unitid); return 0; /* skip invalid unit */ } switch (PTYPE(protocol, p1[2])) { case PTYPE(UAC_VERSION_1, UAC_INPUT_TERMINAL): case PTYPE(UAC_VERSION_2, UAC_INPUT_TERMINAL): case PTYPE(UAC_VERSION_3, UAC_INPUT_TERMINAL): return parse_audio_input_terminal(state, unitid, p1); case PTYPE(UAC_VERSION_1, UAC_MIXER_UNIT): case PTYPE(UAC_VERSION_2, UAC_MIXER_UNIT): case PTYPE(UAC_VERSION_3, UAC3_MIXER_UNIT): return parse_audio_mixer_unit(state, unitid, p1); case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SOURCE): case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SOURCE): return parse_clock_source_unit(state, unitid, p1); case PTYPE(UAC_VERSION_1, UAC_SELECTOR_UNIT): case PTYPE(UAC_VERSION_2, UAC_SELECTOR_UNIT): case PTYPE(UAC_VERSION_3, UAC3_SELECTOR_UNIT): case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SELECTOR): case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SELECTOR): return parse_audio_selector_unit(state, unitid, p1); case PTYPE(UAC_VERSION_1, UAC_FEATURE_UNIT): case PTYPE(UAC_VERSION_2, UAC_FEATURE_UNIT): case PTYPE(UAC_VERSION_3, UAC3_FEATURE_UNIT): return parse_audio_feature_unit(state, unitid, p1); case PTYPE(UAC_VERSION_1, UAC1_PROCESSING_UNIT): case PTYPE(UAC_VERSION_2, UAC2_PROCESSING_UNIT_V2): case PTYPE(UAC_VERSION_3, UAC3_PROCESSING_UNIT): return parse_audio_processing_unit(state, unitid, p1); case PTYPE(UAC_VERSION_1, UAC1_EXTENSION_UNIT): case PTYPE(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2): case PTYPE(UAC_VERSION_3, UAC3_EXTENSION_UNIT): return parse_audio_extension_unit(state, unitid, p1); case PTYPE(UAC_VERSION_2, UAC2_EFFECT_UNIT): case PTYPE(UAC_VERSION_3, UAC3_EFFECT_UNIT): return 0; /* FIXME - effect units not implemented yet */ default: usb_audio_err(state->chip, "unit %u: unexpected type 0x%02x\n", unitid, p1[2]); return -EINVAL; } } static void snd_usb_mixer_free(struct usb_mixer_interface *mixer) { /* kill pending URBs */ snd_usb_mixer_disconnect(mixer); kfree(mixer->id_elems); if (mixer->urb) { kfree(mixer->urb->transfer_buffer); usb_free_urb(mixer->urb); } usb_free_urb(mixer->rc_urb); kfree(mixer->rc_setup_packet); kfree(mixer); } static int snd_usb_mixer_dev_free(struct snd_device *device) { struct usb_mixer_interface *mixer = device->device_data; snd_usb_mixer_free(mixer); return 0; } /* UAC3 predefined channels configuration */ struct uac3_badd_profile { int subclass; const char *name; int c_chmask; /* capture channels mask */ int p_chmask; /* playback channels mask */ int st_chmask; /* side tone mixing channel mask */ }; static const struct uac3_badd_profile uac3_badd_profiles[] = { { /* * BAIF, BAOF or combination of both * IN: Mono or Stereo cfg, Mono alt possible * OUT: Mono or Stereo cfg, Mono alt possible */ .subclass = UAC3_FUNCTION_SUBCLASS_GENERIC_IO, .name = "GENERIC IO", .c_chmask = -1, /* dynamic channels */ .p_chmask = -1, /* dynamic channels */ }, { /* BAOF; Stereo only cfg, Mono alt possible */ .subclass = UAC3_FUNCTION_SUBCLASS_HEADPHONE, .name = "HEADPHONE", .p_chmask = 3, }, { /* BAOF; Mono or Stereo cfg, Mono alt possible */ .subclass = UAC3_FUNCTION_SUBCLASS_SPEAKER, .name = "SPEAKER", .p_chmask = -1, /* dynamic channels */ }, { /* BAIF; Mono or Stereo cfg, Mono alt possible */ .subclass = UAC3_FUNCTION_SUBCLASS_MICROPHONE, .name = "MICROPHONE", .c_chmask = -1, /* dynamic channels */ }, { /* * BAIOF topology * IN: Mono only * OUT: Mono or Stereo cfg, Mono alt possible */ .subclass = UAC3_FUNCTION_SUBCLASS_HEADSET, .name = "HEADSET", .c_chmask = 1, .p_chmask = -1, /* dynamic channels */ .st_chmask = 1, }, { /* BAIOF; IN: Mono only; OUT: Stereo only, Mono alt possible */ .subclass = UAC3_FUNCTION_SUBCLASS_HEADSET_ADAPTER, .name = "HEADSET ADAPTER", .c_chmask = 1, .p_chmask = 3, .st_chmask = 1, }, { /* BAIF + BAOF; IN: Mono only; OUT: Mono only */ .subclass = UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE, .name = "SPEAKERPHONE", .c_chmask = 1, .p_chmask = 1, }, { 0 } /* terminator */ }; static bool uac3_badd_func_has_valid_channels(struct usb_mixer_interface *mixer, const struct uac3_badd_profile *f, int c_chmask, int p_chmask) { /* * If both playback/capture channels are dynamic, make sure * at least one channel is present */ if (f->c_chmask < 0 && f->p_chmask < 0) { if (!c_chmask && !p_chmask) { usb_audio_warn(mixer->chip, "BAAD %s: no channels?", f->name); return false; } return true; } if ((f->c_chmask < 0 && !c_chmask) || (f->c_chmask >= 0 && f->c_chmask != c_chmask)) { usb_audio_warn(mixer->chip, "BAAD %s c_chmask mismatch", f->name); return false; } if ((f->p_chmask < 0 && !p_chmask) || (f->p_chmask >= 0 && f->p_chmask != p_chmask)) { usb_audio_warn(mixer->chip, "BAAD %s p_chmask mismatch", f->name); return false; } return true; } /* * create mixer controls for UAC3 BADD profiles * * UAC3 BADD device doesn't contain CS descriptors thus we will guess everything * * BADD device may contain Mixer Unit, which doesn't have any controls, skip it */ static int snd_usb_mixer_controls_badd(struct usb_mixer_interface *mixer, int ctrlif) { struct usb_device *dev = mixer->chip->dev; struct usb_interface_assoc_descriptor *assoc; int badd_profile = mixer->chip->badd_profile; const struct uac3_badd_profile *f; const struct usbmix_ctl_map *map; int p_chmask = 0, c_chmask = 0, st_chmask = 0; int i; assoc = usb_ifnum_to_if(dev, ctrlif)->intf_assoc; /* Detect BADD capture/playback channels from AS EP descriptors */ for (i = 0; i < assoc->bInterfaceCount; i++) { int intf = assoc->bFirstInterface + i; struct usb_interface *iface; struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; unsigned int maxpacksize; char dir_in; int chmask, num; if (intf == ctrlif) continue; iface = usb_ifnum_to_if(dev, intf); if (!iface) continue; num = iface->num_altsetting; if (num < 2) return -EINVAL; /* * The number of Channels in an AudioStreaming interface * and the audio sample bit resolution (16 bits or 24 * bits) can be derived from the wMaxPacketSize field in * the Standard AS Audio Data Endpoint descriptor in * Alternate Setting 1 */ alts = &iface->altsetting[1]; altsd = get_iface_desc(alts); if (altsd->bNumEndpoints < 1) return -EINVAL; /* check direction */ dir_in = (get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN); maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); switch (maxpacksize) { default: usb_audio_err(mixer->chip, "incorrect wMaxPacketSize 0x%x for BADD profile\n", maxpacksize); return -EINVAL; case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_16: case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_16: case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_24: case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_24: chmask = 1; break; case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_16: case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_16: case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_24: case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_24: chmask = 3; break; } if (dir_in) c_chmask = chmask; else p_chmask = chmask; } usb_audio_dbg(mixer->chip, "UAC3 BADD profile 0x%x: detected c_chmask=%d p_chmask=%d\n", badd_profile, c_chmask, p_chmask); /* check the mapping table */ for (map = uac3_badd_usbmix_ctl_maps; map->id; map++) { if (map->id == badd_profile) break; } if (!map->id) return -EINVAL; for (f = uac3_badd_profiles; f->name; f++) { if (badd_profile == f->subclass) break; } if (!f->name) return -EINVAL; if (!uac3_badd_func_has_valid_channels(mixer, f, c_chmask, p_chmask)) return -EINVAL; st_chmask = f->st_chmask; /* Playback */ if (p_chmask) { /* Master channel, always writable */ build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE, UAC3_BADD_FU_ID2, map->map); /* Mono/Stereo volume channels, always writable */ build_feature_ctl_badd(mixer, p_chmask, UAC_FU_VOLUME, UAC3_BADD_FU_ID2, map->map); } /* Capture */ if (c_chmask) { /* Master channel, always writable */ build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE, UAC3_BADD_FU_ID5, map->map); /* Mono/Stereo volume channels, always writable */ build_feature_ctl_badd(mixer, c_chmask, UAC_FU_VOLUME, UAC3_BADD_FU_ID5, map->map); } /* Side tone-mixing */ if (st_chmask) { /* Master channel, always writable */ build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE, UAC3_BADD_FU_ID7, map->map); /* Mono volume channel, always writable */ build_feature_ctl_badd(mixer, 1, UAC_FU_VOLUME, UAC3_BADD_FU_ID7, map->map); } /* Insertion Control */ if (f->subclass == UAC3_FUNCTION_SUBCLASS_HEADSET_ADAPTER) { struct usb_audio_term iterm, oterm; /* Input Term - Insertion control */ memset(&iterm, 0, sizeof(iterm)); iterm.id = UAC3_BADD_IT_ID4; iterm.type = UAC_BIDIR_TERMINAL_HEADSET; build_connector_control(mixer, map->map, &iterm, true); /* Output Term - Insertion control */ memset(&oterm, 0, sizeof(oterm)); oterm.id = UAC3_BADD_OT_ID3; oterm.type = UAC_BIDIR_TERMINAL_HEADSET; build_connector_control(mixer, map->map, &oterm, false); } return 0; } /* * create mixer controls * * walk through all UAC_OUTPUT_TERMINAL descriptors to search for mixers */ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer) { struct mixer_build state; int err; const struct usbmix_ctl_map *map; void *p; memset(&state, 0, sizeof(state)); state.chip = mixer->chip; state.mixer = mixer; state.buffer = mixer->hostif->extra; state.buflen = mixer->hostif->extralen; /* check the mapping table */ for (map = usbmix_ctl_maps; map->id; map++) { if (map->id == state.chip->usb_id) { state.map = map->map; state.selector_map = map->selector_map; mixer->connector_map = map->connector_map; break; } } p = NULL; while ((p = snd_usb_find_csint_desc(mixer->hostif->extra, mixer->hostif->extralen, p, UAC_OUTPUT_TERMINAL)) != NULL) { if (!snd_usb_validate_audio_desc(p, mixer->protocol)) continue; /* skip invalid descriptor */ if (mixer->protocol == UAC_VERSION_1) { struct uac1_output_terminal_descriptor *desc = p; /* mark terminal ID as visited */ set_bit(desc->bTerminalID, state.unitbitmap); state.oterm.id = desc->bTerminalID; state.oterm.type = le16_to_cpu(desc->wTerminalType); state.oterm.name = desc->iTerminal; err = parse_audio_unit(&state, desc->bSourceID); if (err < 0 && err != -EINVAL) return err; } else if (mixer->protocol == UAC_VERSION_2) { struct uac2_output_terminal_descriptor *desc = p; /* mark terminal ID as visited */ set_bit(desc->bTerminalID, state.unitbitmap); state.oterm.id = desc->bTerminalID; state.oterm.type = le16_to_cpu(desc->wTerminalType); state.oterm.name = desc->iTerminal; err = parse_audio_unit(&state, desc->bSourceID); if (err < 0 && err != -EINVAL) return err; /* * For UAC2, use the same approach to also add the * clock selectors */ err = parse_audio_unit(&state, desc->bCSourceID); if (err < 0 && err != -EINVAL) return err; if ((state.oterm.type & 0xff00) != 0x0100 && uac_v2v3_control_is_readable(le16_to_cpu(desc->bmControls), UAC2_TE_CONNECTOR)) { build_connector_control(state.mixer, state.map, &state.oterm, false); } } else { /* UAC_VERSION_3 */ struct uac3_output_terminal_descriptor *desc = p; /* mark terminal ID as visited */ set_bit(desc->bTerminalID, state.unitbitmap); state.oterm.id = desc->bTerminalID; state.oterm.type = le16_to_cpu(desc->wTerminalType); state.oterm.name = le16_to_cpu(desc->wTerminalDescrStr); err = parse_audio_unit(&state, desc->bSourceID); if (err < 0 && err != -EINVAL) return err; /* * For UAC3, use the same approach to also add the * clock selectors */ err = parse_audio_unit(&state, desc->bCSourceID); if (err < 0 && err != -EINVAL) return err; if ((state.oterm.type & 0xff00) != 0x0100 && uac_v2v3_control_is_readable(le32_to_cpu(desc->bmControls), UAC3_TE_INSERTION)) { build_connector_control(state.mixer, state.map, &state.oterm, false); } } } return 0; } static int delegate_notify(struct usb_mixer_interface *mixer, int unitid, u8 *control, u8 *channel) { const struct usbmix_connector_map *map = mixer->connector_map; if (!map) return unitid; for (; map->id; map++) { if (map->id == unitid) { if (control && map->control) *control = map->control; if (channel && map->channel) *channel = map->channel; return map->delegated_id; } } return unitid; } void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid) { struct usb_mixer_elem_list *list; unitid = delegate_notify(mixer, unitid, NULL, NULL); for_each_mixer_elem(list, mixer, unitid) { struct usb_mixer_elem_info *info; if (!list->is_std_info) continue; info = mixer_elem_list_to_info(list); /* invalidate cache, so the value is read from the device */ info->cached = 0; snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE, &list->kctl->id); } } static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer, struct usb_mixer_elem_list *list) { struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); static const char * const val_types[] = { [USB_MIXER_BOOLEAN] = "BOOLEAN", [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN", [USB_MIXER_S8] = "S8", [USB_MIXER_U8] = "U8", [USB_MIXER_S16] = "S16", [USB_MIXER_U16] = "U16", [USB_MIXER_S32] = "S32", [USB_MIXER_U32] = "U32", [USB_MIXER_BESPOKEN] = "BESPOKEN", }; snd_iprintf(buffer, " Info: id=%i, control=%i, cmask=0x%x, " "channels=%i, type=\"%s\"\n", cval->head.id, cval->control, cval->cmask, cval->channels, val_types[cval->val_type]); snd_iprintf(buffer, " Volume: min=%i, max=%i, dBmin=%i, dBmax=%i\n", cval->min, cval->max, cval->dBmin, cval->dBmax); } static void snd_usb_mixer_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_usb_audio *chip = entry->private_data; struct usb_mixer_interface *mixer; struct usb_mixer_elem_list *list; int unitid; list_for_each_entry(mixer, &chip->mixer_list, list) { snd_iprintf(buffer, "USB Mixer: usb_id=0x%08x, ctrlif=%i, ctlerr=%i\n", chip->usb_id, mixer_ctrl_intf(mixer), mixer->ignore_ctl_error); snd_iprintf(buffer, "Card: %s\n", chip->card->longname); for (unitid = 0; unitid < MAX_ID_ELEMS; unitid++) { for_each_mixer_elem(list, mixer, unitid) { snd_iprintf(buffer, " Unit: %i\n", list->id); if (list->kctl) snd_iprintf(buffer, " Control: name=\"%s\", index=%i\n", list->kctl->id.name, list->kctl->id.index); if (list->dump) list->dump(buffer, list); } } } } static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer, int attribute, int value, int index) { struct usb_mixer_elem_list *list; __u8 unitid = (index >> 8) & 0xff; __u8 control = (value >> 8) & 0xff; __u8 channel = value & 0xff; unsigned int count = 0; if (channel >= MAX_CHANNELS) { usb_audio_dbg(mixer->chip, "%s(): bogus channel number %d\n", __func__, channel); return; } unitid = delegate_notify(mixer, unitid, &control, &channel); for_each_mixer_elem(list, mixer, unitid) count++; if (count == 0) return; for_each_mixer_elem(list, mixer, unitid) { struct usb_mixer_elem_info *info; if (!list->kctl) continue; if (!list->is_std_info) continue; info = mixer_elem_list_to_info(list); if (count > 1 && info->control != control) continue; switch (attribute) { case UAC2_CS_CUR: /* invalidate cache, so the value is read from the device */ if (channel) info->cached &= ~(1 << channel); else /* master channel */ info->cached = 0; snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE, &info->head.kctl->id); break; case UAC2_CS_RANGE: /* TODO */ break; case UAC2_CS_MEM: /* TODO */ break; default: usb_audio_dbg(mixer->chip, "unknown attribute %d in interrupt\n", attribute); break; } /* switch */ } } static void snd_usb_mixer_interrupt(struct urb *urb) { struct usb_mixer_interface *mixer = urb->context; int len = urb->actual_length; int ustatus = urb->status; if (ustatus != 0) goto requeue; if (mixer->protocol == UAC_VERSION_1) { struct uac1_status_word *status; for (status = urb->transfer_buffer; len >= sizeof(*status); len -= sizeof(*status), status++) { dev_dbg(&urb->dev->dev, "status interrupt: %02x %02x\n", status->bStatusType, status->bOriginator); /* ignore any notifications not from the control interface */ if ((status->bStatusType & UAC1_STATUS_TYPE_ORIG_MASK) != UAC1_STATUS_TYPE_ORIG_AUDIO_CONTROL_IF) continue; if (status->bStatusType & UAC1_STATUS_TYPE_MEM_CHANGED) snd_usb_mixer_rc_memory_change(mixer, status->bOriginator); else snd_usb_mixer_notify_id(mixer, status->bOriginator); } } else { /* UAC_VERSION_2 */ struct uac2_interrupt_data_msg *msg; for (msg = urb->transfer_buffer; len >= sizeof(*msg); len -= sizeof(*msg), msg++) { /* drop vendor specific and endpoint requests */ if ((msg->bInfo & UAC2_INTERRUPT_DATA_MSG_VENDOR) || (msg->bInfo & UAC2_INTERRUPT_DATA_MSG_EP)) continue; snd_usb_mixer_interrupt_v2(mixer, msg->bAttribute, le16_to_cpu(msg->wValue), le16_to_cpu(msg->wIndex)); } } requeue: if (ustatus != -ENOENT && ustatus != -ECONNRESET && ustatus != -ESHUTDOWN) { urb->dev = mixer->chip->dev; usb_submit_urb(urb, GFP_ATOMIC); } } /* create the handler for the optional status interrupt endpoint */ static int snd_usb_mixer_status_create(struct usb_mixer_interface *mixer) { struct usb_endpoint_descriptor *ep; void *transfer_buffer; int buffer_length; unsigned int epnum; /* we need one interrupt input endpoint */ if (get_iface_desc(mixer->hostif)->bNumEndpoints < 1) return 0; ep = get_endpoint(mixer->hostif, 0); if (!usb_endpoint_dir_in(ep) || !usb_endpoint_xfer_int(ep)) return 0; epnum = usb_endpoint_num(ep); buffer_length = le16_to_cpu(ep->wMaxPacketSize); transfer_buffer = kmalloc(buffer_length, GFP_KERNEL); if (!transfer_buffer) return -ENOMEM; mixer->urb = usb_alloc_urb(0, GFP_KERNEL); if (!mixer->urb) { kfree(transfer_buffer); return -ENOMEM; } usb_fill_int_urb(mixer->urb, mixer->chip->dev, usb_rcvintpipe(mixer->chip->dev, epnum), transfer_buffer, buffer_length, snd_usb_mixer_interrupt, mixer, ep->bInterval); usb_submit_urb(mixer->urb, GFP_KERNEL); return 0; } int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif) { static const struct snd_device_ops dev_ops = { .dev_free = snd_usb_mixer_dev_free }; struct usb_mixer_interface *mixer; int err; strcpy(chip->card->mixername, "USB Mixer"); mixer = kzalloc(sizeof(*mixer), GFP_KERNEL); if (!mixer) return -ENOMEM; mixer->chip = chip; mixer->ignore_ctl_error = !!(chip->quirk_flags & QUIRK_FLAG_IGNORE_CTL_ERROR); mixer->id_elems = kcalloc(MAX_ID_ELEMS, sizeof(*mixer->id_elems), GFP_KERNEL); if (!mixer->id_elems) { kfree(mixer); return -ENOMEM; } mixer->hostif = &usb_ifnum_to_if(chip->dev, ctrlif)->altsetting[0]; switch (get_iface_desc(mixer->hostif)->bInterfaceProtocol) { case UAC_VERSION_1: default: mixer->protocol = UAC_VERSION_1; break; case UAC_VERSION_2: mixer->protocol = UAC_VERSION_2; break; case UAC_VERSION_3: mixer->protocol = UAC_VERSION_3; break; } if (mixer->protocol == UAC_VERSION_3 && chip->badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) { err = snd_usb_mixer_controls_badd(mixer, ctrlif); if (err < 0) goto _error; } else { err = snd_usb_mixer_controls(mixer); if (err < 0) goto _error; } err = snd_usb_mixer_status_create(mixer); if (err < 0) goto _error; err = snd_usb_mixer_apply_create_quirk(mixer); if (err < 0) goto _error; err = snd_device_new(chip->card, SNDRV_DEV_CODEC, mixer, &dev_ops); if (err < 0) goto _error; if (list_empty(&chip->mixer_list)) snd_card_ro_proc_new(chip->card, "usbmixer", chip, snd_usb_mixer_proc_read); list_add(&mixer->list, &chip->mixer_list); return 0; _error: snd_usb_mixer_free(mixer); return err; } void snd_usb_mixer_disconnect(struct usb_mixer_interface *mixer) { if (mixer->disconnected) return; if (mixer->urb) usb_kill_urb(mixer->urb); if (mixer->rc_urb) usb_kill_urb(mixer->rc_urb); if (mixer->private_free) mixer->private_free(mixer); mixer->disconnected = true; } /* stop any bus activity of a mixer */ static void snd_usb_mixer_inactivate(struct usb_mixer_interface *mixer) { usb_kill_urb(mixer->urb); usb_kill_urb(mixer->rc_urb); } static int snd_usb_mixer_activate(struct usb_mixer_interface *mixer) { int err; if (mixer->urb) { err = usb_submit_urb(mixer->urb, GFP_NOIO); if (err < 0) return err; } return 0; } int snd_usb_mixer_suspend(struct usb_mixer_interface *mixer) { snd_usb_mixer_inactivate(mixer); if (mixer->private_suspend) mixer->private_suspend(mixer); return 0; } static int restore_mixer_value(struct usb_mixer_elem_list *list) { struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); int c, err, idx; if (cval->val_type == USB_MIXER_BESPOKEN) return 0; if (cval->cmask) { idx = 0; for (c = 0; c < MAX_CHANNELS; c++) { if (!(cval->cmask & (1 << c))) continue; if (cval->cached & (1 << (c + 1))) { err = snd_usb_set_cur_mix_value(cval, c + 1, idx, cval->cache_val[idx]); if (err < 0) break; } idx++; } } else { /* master */ if (cval->cached) snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val); } return 0; } int snd_usb_mixer_resume(struct usb_mixer_interface *mixer) { struct usb_mixer_elem_list *list; int id, err; /* restore cached mixer values */ for (id = 0; id < MAX_ID_ELEMS; id++) { for_each_mixer_elem(list, mixer, id) { if (list->resume) { err = list->resume(list); if (err < 0) return err; } } } snd_usb_mixer_resume_quirk(mixer); return snd_usb_mixer_activate(mixer); } void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, struct usb_mixer_interface *mixer, int unitid) { list->mixer = mixer; list->id = unitid; list->dump = snd_usb_mixer_dump_cval; list->resume = restore_mixer_value; }
86 83 83 83 83 83 83 77 78 78 78 77 3 3 2 1 2 1 14 1 4 3 3 3 2 1 2 72 57 57 2 8 8 61 92 92 9 34 11 23 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 /* * net/tipc/bcast.c: TIPC broadcast code * * Copyright (c) 2004-2006, 2014-2017, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <linux/tipc_config.h> #include "socket.h" #include "msg.h" #include "bcast.h" #include "link.h" #include "name_table.h" #define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ #define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; unsigned long sysctl_tipc_bc_retruni __read_mostly; /** * struct tipc_bc_base - base structure for keeping broadcast send state * @link: broadcast send link structure * @inputq: data input queue; will only carry SOCK_WAKEUP messages * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ struct tipc_bc_base { struct tipc_link *link; struct sk_buff_head inputq; int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; bool force_bcast; bool rcast_support; bool force_rcast; int rc_ratio; int bc_threshold; }; static struct tipc_bc_base *tipc_bc_base(struct net *net) { return tipc_net(net)->bcbase; } /* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link * Note: the MTU is decremented to give room for a tunnel header, in * case the message needs to be sent as replicast */ int tipc_bcast_get_mtu(struct net *net) { return tipc_link_mss(tipc_bc_sndlink(net)); } void tipc_bcast_toggle_rcast(struct net *net, bool supp) { tipc_bc_base(net)->rcast_support = supp; } static void tipc_bcbase_calc_bc_threshold(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); int cluster_size = tipc_link_bc_peers(tipc_bc_sndlink(net)); bb->bc_threshold = 1 + (cluster_size * bb->rc_ratio / 100); } /* tipc_bcbase_select_primary(): find a bearer with links to all destinations, * if any, and make it primary bearer */ static void tipc_bcbase_select_primary(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); int all_dests = tipc_link_bc_peers(bb->link); int max_win = tipc_link_max_win(bb->link); int min_win = tipc_link_min_win(bb->link); int i, mtu, prim; bb->primary_bearer = INVALID_BEARER_ID; bb->bcast_support = true; if (!all_dests) return; for (i = 0; i < MAX_BEARERS; i++) { if (!bb->dests[i]) continue; mtu = tipc_bearer_mtu(net, i); if (mtu < tipc_link_mtu(bb->link)) { tipc_link_set_mtu(bb->link, mtu); tipc_link_set_queue_limits(bb->link, min_win, max_win); } bb->bcast_support &= tipc_bearer_bcast_support(net, i); if (bb->dests[i] < all_dests) continue; bb->primary_bearer = i; /* Reduce risk that all nodes select same primary */ if ((i ^ tipc_own_addr(net)) & 1) break; } prim = bb->primary_bearer; if (prim != INVALID_BEARER_ID) bb->bcast_support = tipc_bearer_bcast_support(net, prim); } void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id) { struct tipc_bc_base *bb = tipc_bc_base(net); tipc_bcast_lock(net); bb->dests[bearer_id]++; tipc_bcbase_select_primary(net); tipc_bcast_unlock(net); } void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id) { struct tipc_bc_base *bb = tipc_bc_base(net); tipc_bcast_lock(net); bb->dests[bearer_id]--; tipc_bcbase_select_primary(net); tipc_bcast_unlock(net); } /* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers * * Note that number of reachable destinations, as indicated in the dests[] * array, may transitionally differ from the number of destinations indicated * in each sent buffer. We can sustain this. Excess destination nodes will * drop and never acknowledge the unexpected packets, and missing destinations * will either require retransmission (if they are just about to be added to * the bearer), or be removed from the buffer's 'ackers' counter (if they * just went down) */ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) { int bearer_id; struct tipc_bc_base *bb = tipc_bc_base(net); struct sk_buff *skb, *_skb; struct sk_buff_head _xmitq; if (skb_queue_empty(xmitq)) return; /* The typical case: at least one bearer has links to all nodes */ bearer_id = bb->primary_bearer; if (bearer_id >= 0) { tipc_bearer_bc_xmit(net, bearer_id, xmitq); return; } /* We have to transmit across all bearers */ __skb_queue_head_init(&_xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { if (!bb->dests[bearer_id]) continue; skb_queue_walk(xmitq, skb) { _skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!_skb) break; __skb_queue_tail(&_xmitq, _skb); } tipc_bearer_bc_xmit(net, bearer_id, &_xmitq); } __skb_queue_purge(xmitq); __skb_queue_purge(&_xmitq); } static void tipc_bcast_select_xmit_method(struct net *net, int dests, struct tipc_mc_method *method) { struct tipc_bc_base *bb = tipc_bc_base(net); unsigned long exp = method->expires; /* Broadcast supported by used bearer/bearers? */ if (!bb->bcast_support) { method->rcast = true; return; } /* Any destinations which don't support replicast ? */ if (!bb->rcast_support) { method->rcast = false; return; } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; if (method->mandatory) return; if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && time_before(jiffies, exp)) return; /* Configuration as force 'broadcast' method */ if (bb->force_bcast) { method->rcast = false; return; } /* Configuration as force 'replicast' method */ if (bb->force_rcast) { method->rcast = true; return; } /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } /* tipc_bcast_xmit - broadcast the buffer chain to all external nodes * @net: the applicable net namespace * @pkts: chain of buffers containing message * @cong_link_cnt: set to 1 if broadcast link is congested, otherwise 0 * Consumes the buffer chain. * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE */ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct tipc_link *l = tipc_bc_sndlink(net); struct sk_buff_head xmitq; int rc = 0; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); __skb_queue_purge(pkts); if (rc == -ELINKCONG) { *cong_link_cnt = 1; rc = 0; } return rc; } /* tipc_rcast_xmit - replicate and send a message to given destination nodes * @net: the applicable net namespace * @pkts: chain of buffers containing message * @dests: list of destination nodes * @cong_link_cnt: returns number of congested links * @cong_links: returns identities of congested links * Returns 0 if success, otherwise errno */ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_nlist *dests, u16 *cong_link_cnt) { struct tipc_dest *dst, *tmp; struct sk_buff_head _pkts; u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); __skb_queue_head_init(&_pkts); list_for_each_entry_safe(dst, tmp, &dests->list, list) { dnode = dst->node; if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts)) return -ENOMEM; /* Any other return value than -ELINKCONG is ignored */ if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG) (*cong_link_cnt)++; } return 0; } /* tipc_mcast_send_sync - deliver a dummy message with SYN bit * @net: the applicable net namespace * @skb: socket buffer to copy * @method: send method to be used * @dests: destination nodes for message. * Returns 0 if success, otherwise errno */ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, struct tipc_mc_method *method, struct tipc_nlist *dests) { struct tipc_msg *hdr, *_hdr; struct sk_buff_head tmpq; struct sk_buff *_skb; u16 cong_link_cnt; int rc = 0; /* Is a cluster supporting with new capabilities ? */ if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) return 0; hdr = buf_msg(skb); if (msg_user(hdr) == MSG_FRAGMENTER) hdr = msg_inner_hdr(hdr); if (msg_type(hdr) != TIPC_MCAST_MSG) return 0; /* Allocate dummy message */ _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); if (!_skb) return -ENOMEM; /* Preparing for 'synching' header */ msg_set_syn(hdr, 1); /* Copy skb's header into a dummy header */ skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); skb_orphan(_skb); /* Reverse method for dummy message */ _hdr = buf_msg(_skb); msg_set_size(_hdr, MCAST_H_SIZE); msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); msg_set_errcode(_hdr, TIPC_ERR_NO_PORT); __skb_queue_head_init(&tmpq); __skb_queue_tail(&tmpq, _skb); if (method->rcast) rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt); else rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt); /* This queue should normally be empty by now */ __skb_queue_purge(&tmpq); return rc; } /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace * @pkts: chain of buffers containing message * @method: send method to be used * @dests: destination nodes for message. * @cong_link_cnt: returns number of encountered congested destination links * Consumes buffer chain. * Returns 0 if success, otherwise errno */ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_mc_method *method, struct tipc_nlist *dests, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; bool rcast = method->rcast; struct tipc_msg *hdr; struct sk_buff *skb; int rc = 0; skb_queue_head_init(&inputq); __skb_queue_head_init(&localq); /* Clone packets before they are consumed by next call */ if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { rc = -ENOMEM; goto exit; } /* Send according to determined transmit method */ if (dests->remote) { tipc_bcast_select_xmit_method(net, dests->remote, method); skb = skb_peek(pkts); hdr = buf_msg(skb); if (msg_user(hdr) == MSG_FRAGMENTER) hdr = msg_inner_hdr(hdr); msg_set_is_rcast(hdr, method->rcast); /* Switch method ? */ if (rcast != method->rcast) { rc = tipc_mcast_send_sync(net, skb, method, dests); if (unlikely(rc)) { pr_err("Unable to send SYN: method %d, rc %d\n", rcast, rc); goto exit; } } if (method->rcast) rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); else rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } if (dests->local) { tipc_loopback_trace(net, &localq); tipc_sk_mcast_rcv(net, &localq, &inputq); } exit: /* This queue should normally be empty by now */ __skb_queue_purge(pkts); return rc; } /* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link * * RCU is locked, no other locks set */ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) { struct tipc_msg *hdr = buf_msg(skb); struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct sk_buff_head xmitq; int rc; __skb_queue_head_init(&xmitq); if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) { kfree_skb(skb); return 0; } tipc_bcast_lock(net); if (msg_user(hdr) == BCAST_PROTOCOL) rc = tipc_link_bc_nack_rcv(l, skb, &xmitq); else rc = tipc_link_rcv(l, skb, NULL); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); return rc; } /* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge * * RCU is locked, no other locks set */ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; u16 acked = msg_bcast_ack(hdr); struct sk_buff_head xmitq; /* Ignore bc acks sent by peer before bcast synch point was received */ if (msg_bc_ack_invalid(hdr)) return; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); } /* tipc_bcast_synch_rcv - check and update rcv link with peer's send state * * RCU is locked, no other locks set */ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *retrq) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct tipc_gap_ack_blks *ga; struct sk_buff_head xmitq; int rc = 0; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (msg_type(hdr) != STATE_MSG) { tipc_link_bc_init_rcv(l, hdr); } else if (!msg_bc_ack_invalid(hdr)) { tipc_get_gap_ack_blks(&ga, l, hdr, false); if (!sysctl_tipc_bc_retruni) retrq = &xmitq; rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), msg_bc_gap(hdr), ga, &xmitq, retrq); rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq); } tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); return rc; } /* tipc_bcast_add_peer - add a peer node to broadcast link and bearer * * RCU is locked, node lock is set */ void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l, struct sk_buff_head *xmitq) { struct tipc_link *snd_l = tipc_bc_sndlink(net); tipc_bcast_lock(net); tipc_link_add_bc_peer(snd_l, uc_l, xmitq); tipc_bcbase_select_primary(net); tipc_bcbase_calc_bc_threshold(net); tipc_bcast_unlock(net); } /* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer * * RCU is locked, node lock is set */ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) { struct tipc_link *snd_l = tipc_bc_sndlink(net); struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct sk_buff_head xmitq; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq); tipc_bcbase_select_primary(net); tipc_bcbase_calc_bc_threshold(net); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); } int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l) { if (!l) return -ENOPROTOOPT; tipc_bcast_lock(net); tipc_link_reset_stats(l); tipc_bcast_unlock(net); return 0; } static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win) { struct tipc_link *l = tipc_bc_sndlink(net); if (!l) return -ENOPROTOOPT; if (max_win < BCLINK_WIN_MIN) max_win = BCLINK_WIN_MIN; if (max_win > TIPC_MAX_LINK_WIN) return -EINVAL; tipc_bcast_lock(net); tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win); tipc_bcast_unlock(net); return 0; } static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) { struct tipc_bc_base *bb = tipc_bc_base(net); switch (bc_mode) { case BCLINK_MODE_BCAST: if (!bb->bcast_support) return -ENOPROTOOPT; bb->force_bcast = true; bb->force_rcast = false; break; case BCLINK_MODE_RCAST: if (!bb->rcast_support) return -ENOPROTOOPT; bb->force_bcast = false; bb->force_rcast = true; break; case BCLINK_MODE_SEL: if (!bb->bcast_support || !bb->rcast_support) return -ENOPROTOOPT; bb->force_bcast = false; bb->force_rcast = false; break; default: return -EINVAL; } return 0; } static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) { struct tipc_bc_base *bb = tipc_bc_base(net); if (!bb->bcast_support || !bb->rcast_support) return -ENOPROTOOPT; if (bc_ratio > 100 || bc_ratio <= 0) return -EINVAL; bb->rc_ratio = bc_ratio; tipc_bcast_lock(net); tipc_bcbase_calc_bc_threshold(net); tipc_bcast_unlock(net); return 0; } int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) { int err; u32 win; u32 bc_mode; u32 bc_ratio; struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; if (!attrs[TIPC_NLA_LINK_PROP]) return -EINVAL; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); if (err) return err; if (!props[TIPC_NLA_PROP_WIN] && !props[TIPC_NLA_PROP_BROADCAST] && !props[TIPC_NLA_PROP_BROADCAST_RATIO]) { return -EOPNOTSUPP; } if (props[TIPC_NLA_PROP_BROADCAST]) { bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]); err = tipc_bc_link_set_broadcast_mode(net, bc_mode); } if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) { bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]); err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio); } if (!err && props[TIPC_NLA_PROP_WIN]) { win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); err = tipc_bc_link_set_queue_limits(net, win); } return err; } int tipc_bcast_init(struct net *net) { struct tipc_net *tn = tipc_net(net); struct tipc_bc_base *bb = NULL; struct tipc_link *l = NULL; bb = kzalloc(sizeof(*bb), GFP_KERNEL); if (!bb) goto enomem; tn->bcbase = bb; spin_lock_init(&tipc_net(net)->bclock); if (!tipc_link_bc_create(net, 0, 0, NULL, one_page_mtu, BCLINK_WIN_DEFAULT, BCLINK_WIN_DEFAULT, 0, &bb->inputq, NULL, NULL, &l)) goto enomem; bb->link = l; tn->bcl = l; bb->rc_ratio = 10; bb->rcast_support = true; return 0; enomem: kfree(bb); kfree(l); return -ENOMEM; } void tipc_bcast_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); synchronize_net(); kfree(tn->bcbase); kfree(tn->bcl); } void tipc_nlist_init(struct tipc_nlist *nl, u32 self) { memset(nl, 0, sizeof(*nl)); INIT_LIST_HEAD(&nl->list); nl->self = self; } void tipc_nlist_add(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = true; else if (tipc_dest_push(&nl->list, node, 0)) nl->remote++; } void tipc_nlist_del(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = false; else if (tipc_dest_del(&nl->list, node, 0)) nl->remote--; } void tipc_nlist_purge(struct tipc_nlist *nl) { tipc_dest_list_purge(&nl->list); nl->remote = 0; nl->local = false; } u32 tipc_bcast_get_mode(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); if (bb->force_bcast) return BCLINK_MODE_BCAST; if (bb->force_rcast) return BCLINK_MODE_RCAST; if (bb->bcast_support && bb->rcast_support) return BCLINK_MODE_SEL; return 0; } u32 tipc_bcast_get_broadcast_ratio(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); return bb->rc_ratio; } void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq) { struct sk_buff *skb, *_skb, *tmp; struct tipc_msg *hdr, *_hdr; bool match = false; u32 node, port; skb = skb_peek(inputq); if (!skb) return; hdr = buf_msg(skb); if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) return; node = msg_orignode(hdr); if (node == tipc_own_addr(net)) return; port = msg_origport(hdr); /* Has the twin SYN message already arrived ? */ skb_queue_walk(defq, _skb) { _hdr = buf_msg(_skb); if (msg_orignode(_hdr) != node) continue; if (msg_origport(_hdr) != port) continue; match = true; break; } if (!match) { if (!msg_is_syn(hdr)) return; __skb_dequeue(inputq); __skb_queue_tail(defq, skb); return; } /* Deliver non-SYN message from other link, otherwise queue it */ if (!msg_is_syn(hdr)) { if (msg_is_rcast(hdr) != msg_is_rcast(_hdr)) return; __skb_dequeue(inputq); __skb_queue_tail(defq, skb); return; } /* Queue non-SYN/SYN message from same link */ if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) { __skb_dequeue(inputq); __skb_queue_tail(defq, skb); return; } /* Matching SYN messages => return the one with data, if any */ __skb_unlink(_skb, defq); if (msg_data_sz(hdr)) { kfree_skb(_skb); } else { __skb_dequeue(inputq); kfree_skb(skb); __skb_queue_tail(inputq, _skb); } /* Deliver subsequent non-SYN messages from same peer */ skb_queue_walk_safe(defq, _skb, tmp) { _hdr = buf_msg(_skb); if (msg_orignode(_hdr) != node) continue; if (msg_origport(_hdr) != port) continue; if (msg_is_syn(_hdr)) break; __skb_unlink(_skb, defq); __skb_queue_tail(inputq, _skb); } }
3 13 20 9 26 1 2 22 6 17 16 18 3 11 10 11 4 3 10 10 8 8 1 4 8 29 9 25 25 10 25 2 1 8 18 10 9 3 18 17 17 17 17 1 1 1 1 6 21 2 1 1 2 1 2 29 9 15 10 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/isofs/rock.c * * (C) 1992, 1993 Eric Youngdale * * Rock Ridge Extensions to iso9660 */ #include <linux/slab.h> #include <linux/pagemap.h> #include "isofs.h" #include "rock.h" /* * These functions are designed to read the system areas of a directory record * and extract relevant information. There are different functions provided * depending upon what information we need at the time. One function fills * out an inode structure, a second one extracts a filename, a third one * returns a symbolic link name, and a fourth one returns the extent number * for the file. */ #define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */ struct rock_state { void *buffer; unsigned char *chr; int len; int cont_size; int cont_extent; int cont_offset; int cont_loops; struct inode *inode; }; /* * This is a way of ensuring that we have something in the system * use fields that is compatible with Rock Ridge. Return zero on success. */ static int check_sp(struct rock_ridge *rr, struct inode *inode) { if (rr->u.SP.magic[0] != 0xbe) return -1; if (rr->u.SP.magic[1] != 0xef) return -1; ISOFS_SB(inode->i_sb)->s_rock_offset = rr->u.SP.skip; return 0; } static void setup_rock_ridge(struct iso_directory_record *de, struct inode *inode, struct rock_state *rs) { rs->len = sizeof(struct iso_directory_record) + de->name_len[0]; if (rs->len & 1) (rs->len)++; rs->chr = (unsigned char *)de + rs->len; rs->len = *((unsigned char *)de) - rs->len; if (rs->len < 0) rs->len = 0; if (ISOFS_SB(inode->i_sb)->s_rock_offset != -1) { rs->len -= ISOFS_SB(inode->i_sb)->s_rock_offset; rs->chr += ISOFS_SB(inode->i_sb)->s_rock_offset; if (rs->len < 0) rs->len = 0; } } static void init_rock_state(struct rock_state *rs, struct inode *inode) { memset(rs, 0, sizeof(*rs)); rs->inode = inode; } /* Maximum number of Rock Ridge continuation entries */ #define RR_MAX_CE_ENTRIES 32 /* * Returns 0 if the caller should continue scanning, 1 if the scan must end * and -ve on error. */ static int rock_continue(struct rock_state *rs) { int ret = 1; int blocksize = 1 << rs->inode->i_blkbits; const int min_de_size = offsetof(struct rock_ridge, u); kfree(rs->buffer); rs->buffer = NULL; if ((unsigned)rs->cont_offset > blocksize - min_de_size || (unsigned)rs->cont_size > blocksize || (unsigned)(rs->cont_offset + rs->cont_size) > blocksize) { printk(KERN_NOTICE "rock: corrupted directory entry. " "extent=%d, offset=%d, size=%d\n", rs->cont_extent, rs->cont_offset, rs->cont_size); ret = -EIO; goto out; } if (rs->cont_extent) { struct buffer_head *bh; rs->buffer = kmalloc(rs->cont_size, GFP_KERNEL); if (!rs->buffer) { ret = -ENOMEM; goto out; } ret = -EIO; if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) goto out; bh = sb_bread(rs->inode->i_sb, rs->cont_extent); if (bh) { memcpy(rs->buffer, bh->b_data + rs->cont_offset, rs->cont_size); put_bh(bh); rs->chr = rs->buffer; rs->len = rs->cont_size; rs->cont_extent = 0; rs->cont_size = 0; rs->cont_offset = 0; return 0; } printk("Unable to read rock-ridge attributes\n"); } out: kfree(rs->buffer); rs->buffer = NULL; return ret; } /* * We think there's a record of type `sig' at rs->chr. Parse the signature * and make sure that there's really room for a record of that type. */ static int rock_check_overflow(struct rock_state *rs, int sig) { int len; switch (sig) { case SIG('S', 'P'): len = sizeof(struct SU_SP_s); break; case SIG('C', 'E'): len = sizeof(struct SU_CE_s); break; case SIG('E', 'R'): len = sizeof(struct SU_ER_s); break; case SIG('R', 'R'): len = sizeof(struct RR_RR_s); break; case SIG('P', 'X'): len = sizeof(struct RR_PX_s); break; case SIG('P', 'N'): len = sizeof(struct RR_PN_s); break; case SIG('S', 'L'): len = sizeof(struct RR_SL_s); break; case SIG('N', 'M'): len = sizeof(struct RR_NM_s); break; case SIG('C', 'L'): len = sizeof(struct RR_CL_s); break; case SIG('P', 'L'): len = sizeof(struct RR_PL_s); break; case SIG('T', 'F'): len = sizeof(struct RR_TF_s); break; case SIG('Z', 'F'): len = sizeof(struct RR_ZF_s); break; default: len = 0; break; } len += offsetof(struct rock_ridge, u); if (len > rs->len) { printk(KERN_NOTICE "rock: directory entry would overflow " "storage\n"); printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n", sig, len, rs->len); return -EIO; } return 0; } /* * return length of name field; 0: not found, -1: to be ignored */ int get_rock_ridge_filename(struct iso_directory_record *de, char *retname, struct inode *inode) { struct rock_state rs; struct rock_ridge *rr; int sig; int retnamlen = 0; int truncate = 0; int ret = 0; char *p; int len; if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; *retname = 0; init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; /* * Ignore rock ridge info if rr->len is out of range, but * don't return -EIO because that would make the file * invisible. */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); if (rock_check_overflow(&rs, sig)) goto eio; rs.chr += rr->len; rs.len -= rr->len; /* * As above, just ignore the rock ridge info if rr->len * is bogus. */ if (rs.len < 0) goto out; /* Something got screwed up here */ switch (sig) { case SIG('R', 'R'): if ((rr->u.RR.flags[0] & RR_NM) == 0) goto out; break; case SIG('S', 'P'): if (check_sp(rr, inode)) goto out; break; case SIG('C', 'E'): rs.cont_extent = isonum_733(rr->u.CE.extent); rs.cont_offset = isonum_733(rr->u.CE.offset); rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('N', 'M'): if (truncate) break; if (rr->len < 5) break; /* * If the flags are 2 or 4, this indicates '.' or '..'. * We don't want to do anything with this, because it * screws up the code that calls us. We don't really * care anyways, since we can just use the non-RR * name. */ if (rr->u.NM.flags & 6) break; if (rr->u.NM.flags & ~1) { printk("Unsupported NM flag settings (%d)\n", rr->u.NM.flags); break; } len = rr->len - 5; if (retnamlen + len >= 254) { truncate = 1; break; } p = memchr(rr->u.NM.name, '\0', len); if (unlikely(p)) len = p - rr->u.NM.name; memcpy(retname + retnamlen, rr->u.NM.name, len); retnamlen += len; retname[retnamlen] = '\0'; break; case SIG('R', 'E'): kfree(rs.buffer); return -1; default: break; } } ret = rock_continue(&rs); if (ret == 0) goto repeat; if (ret == 1) return retnamlen; /* If 0, this file did not have a NM field */ out: kfree(rs.buffer); return ret; eio: ret = -EIO; goto out; } #define RR_REGARD_XA 1 #define RR_RELOC_DE 2 static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, struct inode *inode, int flags) { int symlink_len = 0; int cnt, sig; unsigned int reloc_block; struct inode *reloc; struct rock_ridge *rr; int rootflag; struct rock_state rs; int ret = 0; if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); if (flags & RR_REGARD_XA) { rs.chr += 14; rs.len -= 14; if (rs.len < 0) rs.len = 0; } repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; /* * Ignore rock ridge info if rr->len is out of range, but * don't return -EIO because that would make the file * invisible. */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); if (rock_check_overflow(&rs, sig)) goto eio; rs.chr += rr->len; rs.len -= rr->len; /* * As above, just ignore the rock ridge info if rr->len * is bogus. */ if (rs.len < 0) goto out; /* Something got screwed up here */ switch (sig) { #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ case SIG('R', 'R'): if ((rr->u.RR.flags[0] & (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out; break; #endif case SIG('S', 'P'): if (check_sp(rr, inode)) goto out; break; case SIG('C', 'E'): rs.cont_extent = isonum_733(rr->u.CE.extent); rs.cont_offset = isonum_733(rr->u.CE.offset); rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('E', 'R'): /* Invalid length of ER tag id? */ if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len) goto out; ISOFS_SB(inode->i_sb)->s_rock = 1; printk(KERN_DEBUG "ISO 9660 Extensions: "); { int p; for (p = 0; p < rr->u.ER.len_id; p++) printk(KERN_CONT "%c", rr->u.ER.data[p]); } printk(KERN_CONT "\n"); break; case SIG('P', 'X'): inode->i_mode = isonum_733(rr->u.PX.mode); set_nlink(inode, isonum_733(rr->u.PX.n_links)); i_uid_write(inode, isonum_733(rr->u.PX.uid)); i_gid_write(inode, isonum_733(rr->u.PX.gid)); break; case SIG('P', 'N'): { int high, low; high = isonum_733(rr->u.PN.dev_high); low = isonum_733(rr->u.PN.dev_low); /* * The Rock Ridge standard specifies that if * sizeof(dev_t) <= 4, then the high field is * unused, and the device number is completely * stored in the low field. Some writers may * ignore this subtlety, * and as a result we test to see if the entire * device number is * stored in the low field, and use that. */ if ((low & ~0xff) && high == 0) { inode->i_rdev = MKDEV(low >> 8, low & 0xff); } else { inode->i_rdev = MKDEV(high, low); } } break; case SIG('T', 'F'): /* * Some RRIP writers incorrectly place ctime in the * TF_CREATE field. Try to handle this correctly for * either case. */ /* Rock ridge never appears on a High Sierra disk */ cnt = 0; if (rr->u.TF.flags & TF_CREATE) { inode_set_ctime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } if (rr->u.TF.flags & TF_MODIFY) { inode_set_mtime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } if (rr->u.TF.flags & TF_ACCESS) { inode_set_atime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } if (rr->u.TF.flags & TF_ATTRIBUTES) { inode_set_ctime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } break; case SIG('S', 'L'): { int slen; struct SL_component *slp; struct SL_component *oldslp; slen = rr->len - 5; slp = &rr->u.SL.link; inode->i_size = symlink_len; while (slen > 1) { rootflag = 0; switch (slp->flags & ~1) { case 0: inode->i_size += slp->len; break; case 2: inode->i_size += 1; break; case 4: inode->i_size += 2; break; case 8: rootflag = 1; inode->i_size += 1; break; default: printk("Symlink component flag " "not implemented\n"); } slen -= slp->len + 2; oldslp = slp; slp = (struct SL_component *) (((char *)slp) + slp->len + 2); if (slen < 2) { if (((rr->u.SL. flags & 1) != 0) && ((oldslp-> flags & 1) == 0)) inode->i_size += 1; break; } /* * If this component record isn't * continued, then append a '/'. */ if (!rootflag && (oldslp->flags & 1) == 0) inode->i_size += 1; } } symlink_len = inode->i_size; break; case SIG('R', 'E'): printk(KERN_WARNING "Attempt to read inode for " "relocated directory\n"); goto out; case SIG('C', 'L'): if (flags & RR_RELOC_DE) { printk(KERN_ERR "ISOFS: Recursive directory relocation " "is not supported\n"); goto eio; } reloc_block = isonum_733(rr->u.CL.location); if (reloc_block == ISOFS_I(inode)->i_iget5_block && ISOFS_I(inode)->i_iget5_offset == 0) { printk(KERN_ERR "ISOFS: Directory relocation points to " "itself\n"); goto eio; } ISOFS_I(inode)->i_first_extent = reloc_block; reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0); if (IS_ERR(reloc)) { ret = PTR_ERR(reloc); goto out; } inode->i_mode = reloc->i_mode; set_nlink(inode, reloc->i_nlink); inode->i_uid = reloc->i_uid; inode->i_gid = reloc->i_gid; inode->i_rdev = reloc->i_rdev; inode->i_size = reloc->i_size; inode->i_blocks = reloc->i_blocks; inode_set_atime_to_ts(inode, inode_get_atime(reloc)); inode_set_ctime_to_ts(inode, inode_get_ctime(reloc)); inode_set_mtime_to_ts(inode, inode_get_mtime(reloc)); iput(reloc); break; #ifdef CONFIG_ZISOFS case SIG('Z', 'F'): { int algo; if (ISOFS_SB(inode->i_sb)->s_nocompress) break; algo = isonum_721(rr->u.ZF.algorithm); if (algo == SIG('p', 'z')) { int block_shift = isonum_711(&rr->u.ZF.parms[1]); if (block_shift > 17) { printk(KERN_WARNING "isofs: " "Can't handle ZF block " "size of 2^%d\n", block_shift); } else { /* * Note: we don't change * i_blocks here */ ISOFS_I(inode)->i_file_format = isofs_file_compressed; /* * Parameters to compression * algorithm (header size, * block size) */ ISOFS_I(inode)->i_format_parm[0] = isonum_711(&rr->u.ZF.parms[0]); ISOFS_I(inode)->i_format_parm[1] = isonum_711(&rr->u.ZF.parms[1]); inode->i_size = isonum_733(rr->u.ZF. real_size); } } else { printk(KERN_WARNING "isofs: Unknown ZF compression " "algorithm: %c%c\n", rr->u.ZF.algorithm[0], rr->u.ZF.algorithm[1]); } break; } #endif default: break; } } ret = rock_continue(&rs); if (ret == 0) goto repeat; if (ret == 1) ret = 0; out: kfree(rs.buffer); return ret; eio: ret = -EIO; goto out; } static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) { int slen; int rootflag; struct SL_component *oldslp; struct SL_component *slp; slen = rr->len - 5; slp = &rr->u.SL.link; while (slen > 1) { rootflag = 0; switch (slp->flags & ~1) { case 0: if (slp->len > plimit - rpnt) return NULL; memcpy(rpnt, slp->text, slp->len); rpnt += slp->len; break; case 2: if (rpnt >= plimit) return NULL; *rpnt++ = '.'; break; case 4: if (2 > plimit - rpnt) return NULL; *rpnt++ = '.'; *rpnt++ = '.'; break; case 8: if (rpnt >= plimit) return NULL; rootflag = 1; *rpnt++ = '/'; break; default: printk("Symlink component flag not implemented (%d)\n", slp->flags); } slen -= slp->len + 2; oldslp = slp; slp = (struct SL_component *)((char *)slp + slp->len + 2); if (slen < 2) { /* * If there is another SL record, and this component * record isn't continued, then add a slash. */ if ((!rootflag) && (rr->u.SL.flags & 1) && !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; *rpnt++ = '/'; } break; } /* * If this component record isn't continued, then append a '/'. */ if (!rootflag && !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; *rpnt++ = '/'; } } return rpnt; } int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, int relocated) { int flags = relocated ? RR_RELOC_DE : 0; int result = parse_rock_ridge_inode_internal(de, inode, flags); /* * if rockridge flag was reset and we didn't look for attributes * behind eventual XA attributes, have a look there */ if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { result = parse_rock_ridge_inode_internal(de, inode, flags | RR_REGARD_XA); } return result; } /* * read_folio() for symlinks: reads symlink contents into the folio and either * makes it uptodate and returns 0 or returns error (-EIO) */ static int rock_ridge_symlink_read_folio(struct file *file, struct folio *folio) { struct page *page = &folio->page; struct inode *inode = page->mapping->host; struct iso_inode_info *ei = ISOFS_I(inode); struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); char *link = page_address(page); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); struct buffer_head *bh; char *rpnt = link; unsigned char *pnt; struct iso_directory_record *raw_de; unsigned long block, offset; int sig; struct rock_ridge *rr; struct rock_state rs; int ret; if (!sbi->s_rock) goto error; init_rock_state(&rs, inode); block = ei->i_iget5_block; bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; offset = ei->i_iget5_offset; pnt = (unsigned char *)bh->b_data + offset; raw_de = (struct iso_directory_record *)pnt; /* * If we go past the end of the buffer, there is some sort of error. */ if (offset + *pnt > bufsize) goto out_bad_span; /* * Now test for possible Rock Ridge extensions which will override * some of these numbers in the inode structure. */ setup_rock_ridge(raw_de, inode, &rs); repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); if (rock_check_overflow(&rs, sig)) goto out; rs.chr += rr->len; rs.len -= rr->len; if (rs.len < 0) goto out; /* corrupted isofs */ switch (sig) { case SIG('R', 'R'): if ((rr->u.RR.flags[0] & RR_SL) == 0) goto out; break; case SIG('S', 'P'): if (check_sp(rr, inode)) goto out; break; case SIG('S', 'L'): rpnt = get_symlink_chunk(rpnt, rr, link + (PAGE_SIZE - 1)); if (rpnt == NULL) goto out; break; case SIG('C', 'E'): /* This tells is if there is a continuation record */ rs.cont_extent = isonum_733(rr->u.CE.extent); rs.cont_offset = isonum_733(rr->u.CE.offset); rs.cont_size = isonum_733(rr->u.CE.size); break; default: break; } } ret = rock_continue(&rs); if (ret == 0) goto repeat; if (ret < 0) goto fail; if (rpnt == link) goto fail; brelse(bh); *rpnt = '\0'; SetPageUptodate(page); unlock_page(page); return 0; /* error exit from macro */ out: kfree(rs.buffer); goto fail; out_noread: printk("unable to read i-node block"); goto fail; out_bad_span: printk("symlink spans iso9660 blocks\n"); fail: brelse(bh); error: SetPageError(page); unlock_page(page); return -EIO; } const struct address_space_operations isofs_symlink_aops = { .read_folio = rock_ridge_symlink_read_folio };
47 48 47 47 47 27 27 47 47 47 47 27 27 27 47 45 27 2 44 2 44 44 44 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 // SPDX-License-Identifier: GPL-2.0-or-later /* * Sound core. This file is composed of two parts. sound_class * which is common to both OSS and ALSA and OSS sound core which * is used OSS or emulation of it. */ /* * First, the common part. */ #include <linux/module.h> #include <linux/device.h> #include <linux/err.h> #include <linux/kdev_t.h> #include <linux/major.h> #include <sound/core.h> #ifdef CONFIG_SOUND_OSS_CORE static int __init init_oss_soundcore(void); static void cleanup_oss_soundcore(void); #else static inline int init_oss_soundcore(void) { return 0; } static inline void cleanup_oss_soundcore(void) { } #endif MODULE_DESCRIPTION("Core sound module"); MODULE_AUTHOR("Alan Cox"); MODULE_LICENSE("GPL"); static char *sound_devnode(const struct device *dev, umode_t *mode) { if (MAJOR(dev->devt) == SOUND_MAJOR) return NULL; return kasprintf(GFP_KERNEL, "snd/%s", dev_name(dev)); } const struct class sound_class = { .name = "sound", .devnode = sound_devnode, }; EXPORT_SYMBOL(sound_class); static int __init init_soundcore(void) { int rc; rc = init_oss_soundcore(); if (rc) return rc; rc = class_register(&sound_class); if (rc) { cleanup_oss_soundcore(); return rc; } return 0; } static void __exit cleanup_soundcore(void) { cleanup_oss_soundcore(); class_unregister(&sound_class); } subsys_initcall(init_soundcore); module_exit(cleanup_soundcore); #ifdef CONFIG_SOUND_OSS_CORE /* * OSS sound core handling. Breaks out sound functions to submodules * * Author: Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * * -------------------- * * Top level handler for the sound subsystem. Various devices can * plug into this. The fact they don't all go via OSS doesn't mean * they don't have to implement the OSS API. There is a lot of logic * to keeping much of the OSS weight out of the code in a compatibility * module, but it's up to the driver to rember to load it... * * The code provides a set of functions for registration of devices * by type. This is done rather than providing a single call so that * we can hide any future changes in the internals (eg when we go to * 32bit dev_t) from the modules and their interface. * * Secondly we need to allocate the dsp, dsp16 and audio devices as * one. Thus we misuse the chains a bit to simplify this. * * Thirdly to make it more fun and for 2.3.x and above we do all * of this using fine grained locking. * * FIXME: we have to resolve modules and fine grained load/unload * locking at some point in 2.3.x. */ #include <linux/init.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sound.h> #include <linux/kmod.h> #define SOUND_STEP 16 struct sound_unit { int unit_minor; const struct file_operations *unit_fops; struct sound_unit *next; char name[32]; }; /* * By default, OSS sound_core claims full legacy minor range (0-255) * of SOUND_MAJOR to trap open attempts to any sound minor and * requests modules using custom sound-slot/service-* module aliases. * The only benefit of doing this is allowing use of custom module * aliases instead of the standard char-major-* ones. This behavior * prevents alternative OSS implementation and is scheduled to be * removed. * * CONFIG_SOUND_OSS_CORE_PRECLAIM and soundcore.preclaim_oss kernel * parameter are added to allow distros and developers to try and * switch to alternative implementations without needing to rebuild * the kernel in the meantime. If preclaim_oss is non-zero, the * kernel will behave the same as before. All SOUND_MAJOR minors are * preclaimed and the custom module aliases along with standard chrdev * ones are emitted if a missing device is opened. If preclaim_oss is * zero, sound_core only grabs what's actually in use and for missing * devices only the standard chrdev aliases are requested. * * All these clutters are scheduled to be removed along with * sound-slot/service-* module aliases. */ static int preclaim_oss = IS_ENABLED(CONFIG_SOUND_OSS_CORE_PRECLAIM); module_param(preclaim_oss, int, 0444); static int soundcore_open(struct inode *, struct file *); static const struct file_operations soundcore_fops = { /* We must have an owner or the module locking fails */ .owner = THIS_MODULE, .open = soundcore_open, .llseek = noop_llseek, }; /* * Low level list operator. Scan the ordered list, find a hole and * join into it. Called with the lock asserted */ static int __sound_insert_unit(struct sound_unit * s, struct sound_unit **list, const struct file_operations *fops, int index, int low, int top) { int n=low; if (index < 0) { /* first free */ while (*list && (*list)->unit_minor<n) list=&((*list)->next); while(n<top) { /* Found a hole ? */ if(*list==NULL || (*list)->unit_minor>n) break; list=&((*list)->next); n+=SOUND_STEP; } if(n>=top) return -ENOENT; } else { n = low+(index*16); while (*list) { if ((*list)->unit_minor==n) return -EBUSY; if ((*list)->unit_minor>n) break; list=&((*list)->next); } } /* * Fill it in */ s->unit_minor=n; s->unit_fops=fops; /* * Link it */ s->next=*list; *list=s; return n; } /* * Remove a node from the chain. Called with the lock asserted */ static struct sound_unit *__sound_remove_unit(struct sound_unit **list, int unit) { while(*list) { struct sound_unit *p=*list; if(p->unit_minor==unit) { *list=p->next; return p; } list=&(p->next); } printk(KERN_ERR "Sound device %d went missing!\n", unit); return NULL; } /* * This lock guards the sound loader list. */ static DEFINE_SPINLOCK(sound_loader_lock); /* * Allocate the controlling structure and add it to the sound driver * list. Acquires locks as needed */ static int sound_insert_unit(struct sound_unit **list, const struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev) { struct sound_unit *s = kmalloc(sizeof(*s), GFP_KERNEL); int r; if (!s) return -ENOMEM; spin_lock(&sound_loader_lock); retry: r = __sound_insert_unit(s, list, fops, index, low, top); spin_unlock(&sound_loader_lock); if (r < 0) goto fail; else if (r < SOUND_STEP) sprintf(s->name, "sound/%s", name); else sprintf(s->name, "sound/%s%d", name, r / SOUND_STEP); if (!preclaim_oss) { /* * Something else might have grabbed the minor. If * first free slot is requested, rescan with @low set * to the next unit; otherwise, -EBUSY. */ r = __register_chrdev(SOUND_MAJOR, s->unit_minor, 1, s->name, &soundcore_fops); if (r < 0) { spin_lock(&sound_loader_lock); __sound_remove_unit(list, s->unit_minor); if (index < 0) { low = s->unit_minor + SOUND_STEP; goto retry; } spin_unlock(&sound_loader_lock); r = -EBUSY; goto fail; } } device_create(&sound_class, dev, MKDEV(SOUND_MAJOR, s->unit_minor), NULL, "%s", s->name+6); return s->unit_minor; fail: kfree(s); return r; } /* * Remove a unit. Acquires locks as needed. The drivers MUST have * completed the removal before their file operations become * invalid. */ static void sound_remove_unit(struct sound_unit **list, int unit) { struct sound_unit *p; spin_lock(&sound_loader_lock); p = __sound_remove_unit(list, unit); spin_unlock(&sound_loader_lock); if (p) { if (!preclaim_oss) __unregister_chrdev(SOUND_MAJOR, p->unit_minor, 1, p->name); device_destroy(&sound_class, MKDEV(SOUND_MAJOR, p->unit_minor)); kfree(p); } } /* * Allocations * * 0 *16 Mixers * 1 *8 Sequencers * 2 *16 Midi * 3 *16 DSP * 4 *16 SunDSP * 5 *16 DSP16 * 6 -- sndstat (obsolete) * 7 *16 unused * 8 -- alternate sequencer (see above) * 9 *16 raw synthesizer access * 10 *16 unused * 11 *16 unused * 12 *16 unused * 13 *16 unused * 14 *16 unused * 15 *16 unused */ static struct sound_unit *chains[SOUND_STEP]; /** * register_sound_special_device - register a special sound node * @fops: File operations for the driver * @unit: Unit number to allocate * @dev: device pointer * * Allocate a special sound device by minor number from the sound * subsystem. * * Return: The allocated number is returned on success. On failure, * a negative error code is returned. */ int register_sound_special_device(const struct file_operations *fops, int unit, struct device *dev) { const int chain = unit % SOUND_STEP; int max_unit = 256; const char *name; char _name[16]; switch (chain) { case 0: name = "mixer"; break; case 1: name = "sequencer"; if (unit >= SOUND_STEP) goto __unknown; max_unit = unit + 1; break; case 2: name = "midi"; break; case 3: name = "dsp"; break; case 4: name = "audio"; break; case 5: name = "dspW"; break; case 8: name = "sequencer2"; if (unit >= SOUND_STEP) goto __unknown; max_unit = unit + 1; break; case 9: name = "dmmidi"; break; case 10: name = "dmfm"; break; case 12: name = "adsp"; break; case 13: name = "amidi"; break; case 14: name = "admmidi"; break; default: { __unknown: sprintf(_name, "unknown%d", chain); if (unit >= SOUND_STEP) strcat(_name, "-"); name = _name; } break; } return sound_insert_unit(&chains[chain], fops, -1, unit, max_unit, name, 0600, dev); } EXPORT_SYMBOL(register_sound_special_device); int register_sound_special(const struct file_operations *fops, int unit) { return register_sound_special_device(fops, unit, NULL); } EXPORT_SYMBOL(register_sound_special); /** * register_sound_mixer - register a mixer device * @fops: File operations for the driver * @dev: Unit number to allocate * * Allocate a mixer device. Unit is the number of the mixer requested. * Pass -1 to request the next free mixer unit. * * Return: On success, the allocated number is returned. On failure, * a negative error code is returned. */ int register_sound_mixer(const struct file_operations *fops, int dev) { return sound_insert_unit(&chains[0], fops, dev, 0, 128, "mixer", 0600, NULL); } EXPORT_SYMBOL(register_sound_mixer); /* * DSP's are registered as a triple. Register only one and cheat * in open - see below. */ /** * register_sound_dsp - register a DSP device * @fops: File operations for the driver * @dev: Unit number to allocate * * Allocate a DSP device. Unit is the number of the DSP requested. * Pass -1 to request the next free DSP unit. * * This function allocates both the audio and dsp device entries together * and will always allocate them as a matching pair - eg dsp3/audio3 * * Return: On success, the allocated number is returned. On failure, * a negative error code is returned. */ int register_sound_dsp(const struct file_operations *fops, int dev) { return sound_insert_unit(&chains[3], fops, dev, 3, 131, "dsp", 0600, NULL); } EXPORT_SYMBOL(register_sound_dsp); /** * unregister_sound_special - unregister a special sound device * @unit: unit number to allocate * * Release a sound device that was allocated with * register_sound_special(). The unit passed is the return value from * the register function. */ void unregister_sound_special(int unit) { sound_remove_unit(&chains[unit % SOUND_STEP], unit); } EXPORT_SYMBOL(unregister_sound_special); /** * unregister_sound_mixer - unregister a mixer * @unit: unit number to allocate * * Release a sound device that was allocated with register_sound_mixer(). * The unit passed is the return value from the register function. */ void unregister_sound_mixer(int unit) { sound_remove_unit(&chains[0], unit); } EXPORT_SYMBOL(unregister_sound_mixer); /** * unregister_sound_dsp - unregister a DSP device * @unit: unit number to allocate * * Release a sound device that was allocated with register_sound_dsp(). * The unit passed is the return value from the register function. * * Both of the allocated units are released together automatically. */ void unregister_sound_dsp(int unit) { sound_remove_unit(&chains[3], unit); } EXPORT_SYMBOL(unregister_sound_dsp); static struct sound_unit *__look_for_unit(int chain, int unit) { struct sound_unit *s; s=chains[chain]; while(s && s->unit_minor <= unit) { if(s->unit_minor==unit) return s; s=s->next; } return NULL; } static int soundcore_open(struct inode *inode, struct file *file) { int chain; int unit = iminor(inode); struct sound_unit *s; const struct file_operations *new_fops = NULL; chain=unit&0x0F; if(chain==4 || chain==5) /* dsp/audio/dsp16 */ { unit&=0xF0; unit|=3; chain=3; } spin_lock(&sound_loader_lock); s = __look_for_unit(chain, unit); if (s) new_fops = fops_get(s->unit_fops); if (preclaim_oss && !new_fops) { spin_unlock(&sound_loader_lock); /* * Please, don't change this order or code. * For ALSA slot means soundcard and OSS emulation code * comes as add-on modules which aren't depend on * ALSA toplevel modules for soundcards, thus we need * load them at first. [Jaroslav Kysela <perex@jcu.cz>] */ request_module("sound-slot-%i", unit>>4); request_module("sound-service-%i-%i", unit>>4, chain); /* * sound-slot/service-* module aliases are scheduled * for removal in favor of the standard char-major-* * module aliases. For the time being, generate both * the legacy and standard module aliases to ease * transition. */ if (request_module("char-major-%d-%d", SOUND_MAJOR, unit) > 0) request_module("char-major-%d", SOUND_MAJOR); spin_lock(&sound_loader_lock); s = __look_for_unit(chain, unit); if (s) new_fops = fops_get(s->unit_fops); } spin_unlock(&sound_loader_lock); if (!new_fops) return -ENODEV; /* * We rely upon the fact that we can't be unloaded while the * subdriver is there. */ replace_fops(file, new_fops); if (!file->f_op->open) return -ENODEV; return file->f_op->open(inode, file); } MODULE_ALIAS_CHARDEV_MAJOR(SOUND_MAJOR); static void cleanup_oss_soundcore(void) { /* We have nothing to really do here - we know the lists must be empty */ unregister_chrdev(SOUND_MAJOR, "sound"); } static int __init init_oss_soundcore(void) { if (preclaim_oss && register_chrdev(SOUND_MAJOR, "sound", &soundcore_fops) < 0) { printk(KERN_ERR "soundcore: sound device already in use.\n"); return -EBUSY; } return 0; } #endif /* CONFIG_SOUND_OSS_CORE */
8 2 7 3 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> #include <linux/netfilter/x_tables.h> static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; struct in6_addr pfx; __wsum src_sum, dst_sum; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; /* Ensure that LSB of prefix is zero */ ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) return -EINVAL; ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) return -EINVAL; src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, struct in6_addr *addr) { unsigned int pfx_len; unsigned int i, idx; __be32 mask; __sum16 sum; pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); for (i = 0; i < pfx_len; i += 32) { if (pfx_len - i >= 32) mask = 0; else mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) idx = 3; else { for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { if ((__force __sum16)addr->s6_addr16[idx] != CSUM_MANGLED_0) break; } if (idx == ARRAY_SIZE(addr->s6_addr16)) return false; } sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; return true; } static struct ipv6hdr *icmpv6_bounced_ipv6hdr(struct sk_buff *skb, struct ipv6hdr *_bounced_hdr) { if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NULL; if (!icmpv6_is_err(icmp6_hdr(skb)->icmp6_type)) return NULL; return skb_header_pointer(skb, skb_transport_offset(skb) + sizeof(struct icmp6hdr), sizeof(struct ipv6hdr), _bounced_hdr); } static unsigned int ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, saddr)); return NF_DROP; } /* rewrite dst addr of bounced packet which was sent to dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->daddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->daddr); } return XT_CONTINUE; } static unsigned int ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, daddr)); return NF_DROP; } /* rewrite src addr of bounced packet which was sent from dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->saddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->saddr); } return XT_CONTINUE; } static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_POST_ROUTING), .me = THIS_MODULE, }, { .name = "DNPT", .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, }; static int __init ip6t_npt_init(void) { return xt_register_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } static void __exit ip6t_npt_exit(void) { xt_unregister_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } module_init(ip6t_npt_init); module_exit(ip6t_npt_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS("ip6t_SNPT"); MODULE_ALIAS("ip6t_DNPT");
7 1 1 1 5 14 14 4 4 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 /* * Copyright (c) 2006-2008 Intel Corporation * Copyright (c) 2007 Dave Airlie <airlied@linux.ie> * Copyright (c) 2008 Red Hat Inc. * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_gem.h> #include <drm/drm_mode.h> #include "drm_crtc_internal.h" #include "drm_internal.h" /** * DOC: overview * * The KMS API doesn't standardize backing storage object creation and leaves it * to driver-specific ioctls. Furthermore actually creating a buffer object even * for GEM-based drivers is done through a driver-specific ioctl - GEM only has * a common userspace interface for sharing and destroying objects. While not an * issue for full-fledged graphics stacks that include device-specific userspace * components (in libdrm for instance), this limit makes DRM-based early boot * graphics unnecessarily complex. * * Dumb objects partly alleviate the problem by providing a standard API to * create dumb buffers suitable for scanout, which can then be used to create * KMS frame buffers. * * To support dumb objects drivers must implement the &drm_driver.dumb_create * and &drm_driver.dumb_map_offset operations (the latter defaults to * drm_gem_dumb_map_offset() if not set). Drivers that don't use GEM handles * additionally need to implement the &drm_driver.dumb_destroy operation. See * the callbacks for further details. * * Note that dumb objects may not be used for gpu acceleration, as has been * attempted on some ARM embedded platforms. Such drivers really must have * a hardware-specific ioctl to allocate suitable buffer objects. */ int drm_mode_create_dumb(struct drm_device *dev, struct drm_mode_create_dumb *args, struct drm_file *file_priv) { u32 cpp, stride, size; if (!dev->driver->dumb_create) return -ENOSYS; if (!args->width || !args->height || !args->bpp) return -EINVAL; /* overflow checks for 32bit size calculations */ if (args->bpp > U32_MAX - 8) return -EINVAL; cpp = DIV_ROUND_UP(args->bpp, 8); if (cpp > U32_MAX / args->width) return -EINVAL; stride = cpp * args->width; if (args->height > U32_MAX / stride) return -EINVAL; /* test for wrap-around */ size = args->height * stride; if (PAGE_ALIGN(size) == 0) return -EINVAL; /* * handle, pitch and size are output parameters. Zero them out to * prevent drivers from accidentally using uninitialized data. Since * not all existing userspace is clearing these fields properly we * cannot reject IOCTL with garbage in them. */ args->handle = 0; args->pitch = 0; args->size = 0; return dev->driver->dumb_create(file_priv, dev, args); } int drm_mode_create_dumb_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { return drm_mode_create_dumb(dev, data, file_priv); } /** * drm_mode_mmap_dumb_ioctl - create an mmap offset for a dumb backing storage buffer * @dev: DRM device * @data: ioctl data * @file_priv: DRM file info * * Allocate an offset in the drm device node's address space to be able to * memory map a dumb buffer. * * Called by the user via ioctl. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_mmap_dumb_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_map_dumb *args = data; if (!dev->driver->dumb_create) return -ENOSYS; if (dev->driver->dumb_map_offset) return dev->driver->dumb_map_offset(file_priv, dev, args->handle, &args->offset); else return drm_gem_dumb_map_offset(file_priv, dev, args->handle, &args->offset); } int drm_mode_destroy_dumb(struct drm_device *dev, u32 handle, struct drm_file *file_priv) { if (!dev->driver->dumb_create) return -ENOSYS; return drm_gem_handle_delete(file_priv, handle); } int drm_mode_destroy_dumb_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_destroy_dumb *args = data; return drm_mode_destroy_dumb(dev, args->handle, file_priv); }
9 9 7 30 1 1 9 1 7 1 2 3 2 7 5 7 7 5 9 11 2 7 4 9 9 9 5 2 1 1 1 4 3 1 2 1 3 1 2 1 4 3 13 4 9 14 11 3 6 1 3 3 2 26 43 43 42 43 15 27 26 13 13 13 8 2 2 6 19 19 1 18 1 1 16 2 15 10 5 5 4 1 7 2 2 2 1 7 9 5 1 5 9 11 11 1 19 19 19 19 19 19 2 3 3 15 5 5 5 5 15 15 5 2 8 5 3 3 3 3 3 3 3 3 3 3 2 1 1 1 3 3 3 1 1 1 1 5 5 1 4 4 4 8 8 8 6 2 8 14 15 15 15 14 3 15 5 5 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 /* * videobuf2-v4l2.c - V4L2 driver helper framework * * Copyright (C) 2010 Samsung Electronics * * Author: Pawel Osciak <pawel@osciak.com> * Marek Szyprowski <m.szyprowski@samsung.com> * * The vb2_thread implementation was based on code from videobuf-dvb.c: * (c) 2004 Gerd Knorr <kraxel@bytesex.org> [SUSE Labs] * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. */ #include <linux/device.h> #include <linux/err.h> #include <linux/freezer.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/slab.h> #include <media/v4l2-common.h> #include <media/v4l2-dev.h> #include <media/v4l2-device.h> #include <media/v4l2-event.h> #include <media/v4l2-fh.h> #include <media/videobuf2-v4l2.h> static int debug; module_param(debug, int, 0644); #define dprintk(q, level, fmt, arg...) \ do { \ if (debug >= level) \ pr_info("vb2-v4l2: [%p] %s: " fmt, \ (q)->name, __func__, ## arg); \ } while (0) /* Flags that are set by us */ #define V4L2_BUFFER_MASK_FLAGS (V4L2_BUF_FLAG_MAPPED | V4L2_BUF_FLAG_QUEUED | \ V4L2_BUF_FLAG_DONE | V4L2_BUF_FLAG_ERROR | \ V4L2_BUF_FLAG_PREPARED | \ V4L2_BUF_FLAG_IN_REQUEST | \ V4L2_BUF_FLAG_REQUEST_FD | \ V4L2_BUF_FLAG_TIMESTAMP_MASK) /* Output buffer flags that should be passed on to the driver */ #define V4L2_BUFFER_OUT_FLAGS (V4L2_BUF_FLAG_PFRAME | \ V4L2_BUF_FLAG_BFRAME | \ V4L2_BUF_FLAG_KEYFRAME | \ V4L2_BUF_FLAG_TIMECODE | \ V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) /* * __verify_planes_array() - verify that the planes array passed in struct * v4l2_buffer from userspace can be safely used */ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer *b) { if (!V4L2_TYPE_IS_MULTIPLANAR(b->type)) return 0; /* Is memory for copying plane information present? */ if (b->m.planes == NULL) { dprintk(vb->vb2_queue, 1, "multi-planar buffer passed but planes array not provided\n"); return -EINVAL; } if (b->length < vb->num_planes || b->length > VB2_MAX_PLANES) { dprintk(vb->vb2_queue, 1, "incorrect planes array length, expected %d, got %d\n", vb->num_planes, b->length); return -EINVAL; } return 0; } static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb) { return __verify_planes_array(vb, pb); } /* * __verify_length() - Verify that the bytesused value for each plane fits in * the plane length and that the data offset doesn't exceed the bytesused value. */ static int __verify_length(struct vb2_buffer *vb, const struct v4l2_buffer *b) { unsigned int length; unsigned int bytesused; unsigned int plane; if (V4L2_TYPE_IS_CAPTURE(b->type)) return 0; if (V4L2_TYPE_IS_MULTIPLANAR(b->type)) { for (plane = 0; plane < vb->num_planes; ++plane) { length = (b->memory == VB2_MEMORY_USERPTR || b->memory == VB2_MEMORY_DMABUF) ? b->m.planes[plane].length : vb->planes[plane].length; bytesused = b->m.planes[plane].bytesused ? b->m.planes[plane].bytesused : length; if (b->m.planes[plane].bytesused > length) return -EINVAL; if (b->m.planes[plane].data_offset > 0 && b->m.planes[plane].data_offset >= bytesused) return -EINVAL; } } else { length = (b->memory == VB2_MEMORY_USERPTR) ? b->length : vb->planes[0].length; if (b->bytesused > length) return -EINVAL; } return 0; } /* * __init_vb2_v4l2_buffer() - initialize the vb2_v4l2_buffer struct */ static void __init_vb2_v4l2_buffer(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); vbuf->request_fd = -1; } static void __copy_timestamp(struct vb2_buffer *vb, const void *pb) { const struct v4l2_buffer *b = pb; struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vb2_queue *q = vb->vb2_queue; if (q->is_output) { /* * For output buffers copy the timestamp if needed, * and the timecode field and flag if needed. */ if (q->copy_timestamp) vb->timestamp = v4l2_buffer_get_timestamp(b); vbuf->flags |= b->flags & V4L2_BUF_FLAG_TIMECODE; if (b->flags & V4L2_BUF_FLAG_TIMECODE) vbuf->timecode = b->timecode; } }; static void vb2_warn_zero_bytesused(struct vb2_buffer *vb) { static bool check_once; if (check_once) return; check_once = true; pr_warn("use of bytesused == 0 is deprecated and will be removed in the future,\n"); if (vb->vb2_queue->allow_zero_bytesused) pr_warn("use VIDIOC_DECODER_CMD(V4L2_DEC_CMD_STOP) instead.\n"); else pr_warn("use the actual size instead.\n"); } static int vb2_fill_vb2_v4l2_buffer(struct vb2_buffer *vb, struct v4l2_buffer *b) { struct vb2_queue *q = vb->vb2_queue; struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vb2_plane *planes = vbuf->planes; unsigned int plane; int ret; ret = __verify_length(vb, b); if (ret < 0) { dprintk(q, 1, "plane parameters verification failed: %d\n", ret); return ret; } if (b->field == V4L2_FIELD_ALTERNATE && q->is_output) { /* * If the format's field is ALTERNATE, then the buffer's field * should be either TOP or BOTTOM, not ALTERNATE since that * makes no sense. The driver has to know whether the * buffer represents a top or a bottom field in order to * program any DMA correctly. Using ALTERNATE is wrong, since * that just says that it is either a top or a bottom field, * but not which of the two it is. */ dprintk(q, 1, "the field is incorrectly set to ALTERNATE for an output buffer\n"); return -EINVAL; } vbuf->sequence = 0; vbuf->request_fd = -1; vbuf->is_held = false; if (V4L2_TYPE_IS_MULTIPLANAR(b->type)) { switch (b->memory) { case VB2_MEMORY_USERPTR: for (plane = 0; plane < vb->num_planes; ++plane) { planes[plane].m.userptr = b->m.planes[plane].m.userptr; planes[plane].length = b->m.planes[plane].length; } break; case VB2_MEMORY_DMABUF: for (plane = 0; plane < vb->num_planes; ++plane) { planes[plane].m.fd = b->m.planes[plane].m.fd; planes[plane].length = b->m.planes[plane].length; } break; default: for (plane = 0; plane < vb->num_planes; ++plane) { planes[plane].m.offset = vb->planes[plane].m.offset; planes[plane].length = vb->planes[plane].length; } break; } /* Fill in driver-provided information for OUTPUT types */ if (V4L2_TYPE_IS_OUTPUT(b->type)) { /* * Will have to go up to b->length when API starts * accepting variable number of planes. * * If bytesused == 0 for the output buffer, then fall * back to the full buffer size. In that case * userspace clearly never bothered to set it and * it's a safe assumption that they really meant to * use the full plane sizes. * * Some drivers, e.g. old codec drivers, use bytesused == 0 * as a way to indicate that streaming is finished. * In that case, the driver should use the * allow_zero_bytesused flag to keep old userspace * applications working. */ for (plane = 0; plane < vb->num_planes; ++plane) { struct vb2_plane *pdst = &planes[plane]; struct v4l2_plane *psrc = &b->m.planes[plane]; if (psrc->bytesused == 0) vb2_warn_zero_bytesused(vb); if (vb->vb2_queue->allow_zero_bytesused) pdst->bytesused = psrc->bytesused; else pdst->bytesused = psrc->bytesused ? psrc->bytesused : pdst->length; pdst->data_offset = psrc->data_offset; } } } else { /* * Single-planar buffers do not use planes array, * so fill in relevant v4l2_buffer struct fields instead. * In vb2 we use our internal V4l2_planes struct for * single-planar buffers as well, for simplicity. * * If bytesused == 0 for the output buffer, then fall back * to the full buffer size as that's a sensible default. * * Some drivers, e.g. old codec drivers, use bytesused == 0 as * a way to indicate that streaming is finished. In that case, * the driver should use the allow_zero_bytesused flag to keep * old userspace applications working. */ switch (b->memory) { case VB2_MEMORY_USERPTR: planes[0].m.userptr = b->m.userptr; planes[0].length = b->length; break; case VB2_MEMORY_DMABUF: planes[0].m.fd = b->m.fd; planes[0].length = b->length; break; default: planes[0].m.offset = vb->planes[0].m.offset; planes[0].length = vb->planes[0].length; break; } planes[0].data_offset = 0; if (V4L2_TYPE_IS_OUTPUT(b->type)) { if (b->bytesused == 0) vb2_warn_zero_bytesused(vb); if (vb->vb2_queue->allow_zero_bytesused) planes[0].bytesused = b->bytesused; else planes[0].bytesused = b->bytesused ? b->bytesused : planes[0].length; } else planes[0].bytesused = 0; } /* Zero flags that we handle */ vbuf->flags = b->flags & ~V4L2_BUFFER_MASK_FLAGS; if (!vb->vb2_queue->copy_timestamp || V4L2_TYPE_IS_CAPTURE(b->type)) { /* * Non-COPY timestamps and non-OUTPUT queues will get * their timestamp and timestamp source flags from the * queue. */ vbuf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK; } if (V4L2_TYPE_IS_OUTPUT(b->type)) { /* * For output buffers mask out the timecode flag: * this will be handled later in vb2_qbuf(). * The 'field' is valid metadata for this output buffer * and so that needs to be copied here. */ vbuf->flags &= ~V4L2_BUF_FLAG_TIMECODE; vbuf->field = b->field; if (!(q->subsystem_flags & VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF)) vbuf->flags &= ~V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF; } else { /* Zero any output buffer flags as this is a capture buffer */ vbuf->flags &= ~V4L2_BUFFER_OUT_FLAGS; /* Zero last flag, this is a signal from driver to userspace */ vbuf->flags &= ~V4L2_BUF_FLAG_LAST; } return 0; } static void set_buffer_cache_hints(struct vb2_queue *q, struct vb2_buffer *vb, struct v4l2_buffer *b) { if (!vb2_queue_allows_cache_hints(q)) { /* * Clear buffer cache flags if queue does not support user * space hints. That's to indicate to userspace that these * flags won't work. */ b->flags &= ~V4L2_BUF_FLAG_NO_CACHE_INVALIDATE; b->flags &= ~V4L2_BUF_FLAG_NO_CACHE_CLEAN; return; } if (b->flags & V4L2_BUF_FLAG_NO_CACHE_INVALIDATE) vb->skip_cache_sync_on_finish = 1; if (b->flags & V4L2_BUF_FLAG_NO_CACHE_CLEAN) vb->skip_cache_sync_on_prepare = 1; } static int vb2_queue_or_prepare_buf(struct vb2_queue *q, struct media_device *mdev, struct vb2_buffer *vb, struct v4l2_buffer *b, bool is_prepare, struct media_request **p_req) { const char *opname = is_prepare ? "prepare_buf" : "qbuf"; struct media_request *req; struct vb2_v4l2_buffer *vbuf; int ret; if (b->type != q->type) { dprintk(q, 1, "%s: invalid buffer type\n", opname); return -EINVAL; } if (b->memory != q->memory) { dprintk(q, 1, "%s: invalid memory type\n", opname); return -EINVAL; } vbuf = to_vb2_v4l2_buffer(vb); ret = __verify_planes_array(vb, b); if (ret) return ret; if (!is_prepare && (b->flags & V4L2_BUF_FLAG_REQUEST_FD) && vb->state != VB2_BUF_STATE_DEQUEUED) { dprintk(q, 1, "%s: buffer is not in dequeued state\n", opname); return -EINVAL; } if (!vb->prepared) { set_buffer_cache_hints(q, vb, b); /* Copy relevant information provided by the userspace */ memset(vbuf->planes, 0, sizeof(vbuf->planes[0]) * vb->num_planes); ret = vb2_fill_vb2_v4l2_buffer(vb, b); if (ret) return ret; } if (is_prepare) return 0; if (!(b->flags & V4L2_BUF_FLAG_REQUEST_FD)) { if (q->requires_requests) { dprintk(q, 1, "%s: queue requires requests\n", opname); return -EBADR; } if (q->uses_requests) { dprintk(q, 1, "%s: queue uses requests\n", opname); return -EBUSY; } return 0; } else if (!q->supports_requests) { dprintk(q, 1, "%s: queue does not support requests\n", opname); return -EBADR; } else if (q->uses_qbuf) { dprintk(q, 1, "%s: queue does not use requests\n", opname); return -EBUSY; } /* * For proper locking when queueing a request you need to be able * to lock access to the vb2 queue, so check that there is a lock * that we can use. In addition p_req must be non-NULL. */ if (WARN_ON(!q->lock || !p_req)) return -EINVAL; /* * Make sure this op is implemented by the driver. It's easy to forget * this callback, but is it important when canceling a buffer in a * queued request. */ if (WARN_ON(!q->ops->buf_request_complete)) return -EINVAL; /* * Make sure this op is implemented by the driver for the output queue. * It's easy to forget this callback, but is it important to correctly * validate the 'field' value at QBUF time. */ if (WARN_ON((q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT || q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) && !q->ops->buf_out_validate)) return -EINVAL; req = media_request_get_by_fd(mdev, b->request_fd); if (IS_ERR(req)) { dprintk(q, 1, "%s: invalid request_fd\n", opname); return PTR_ERR(req); } /* * Early sanity check. This is checked again when the buffer * is bound to the request in vb2_core_qbuf(). */ if (req->state != MEDIA_REQUEST_STATE_IDLE && req->state != MEDIA_REQUEST_STATE_UPDATING) { dprintk(q, 1, "%s: request is not idle\n", opname); media_request_put(req); return -EBUSY; } *p_req = req; vbuf->request_fd = b->request_fd; return 0; } /* * __fill_v4l2_buffer() - fill in a struct v4l2_buffer with information to be * returned to userspace */ static void __fill_v4l2_buffer(struct vb2_buffer *vb, void *pb) { struct v4l2_buffer *b = pb; struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vb2_queue *q = vb->vb2_queue; unsigned int plane; /* Copy back data such as timestamp, flags, etc. */ b->index = vb->index; b->type = vb->type; b->memory = vb->memory; b->bytesused = 0; b->flags = vbuf->flags; b->field = vbuf->field; v4l2_buffer_set_timestamp(b, vb->timestamp); b->timecode = vbuf->timecode; b->sequence = vbuf->sequence; b->reserved2 = 0; b->request_fd = 0; if (q->is_multiplanar) { /* * Fill in plane-related data if userspace provided an array * for it. The caller has already verified memory and size. */ b->length = vb->num_planes; for (plane = 0; plane < vb->num_planes; ++plane) { struct v4l2_plane *pdst = &b->m.planes[plane]; struct vb2_plane *psrc = &vb->planes[plane]; pdst->bytesused = psrc->bytesused; pdst->length = psrc->length; if (q->memory == VB2_MEMORY_MMAP) pdst->m.mem_offset = psrc->m.offset; else if (q->memory == VB2_MEMORY_USERPTR) pdst->m.userptr = psrc->m.userptr; else if (q->memory == VB2_MEMORY_DMABUF) pdst->m.fd = psrc->m.fd; pdst->data_offset = psrc->data_offset; memset(pdst->reserved, 0, sizeof(pdst->reserved)); } } else { /* * We use length and offset in v4l2_planes array even for * single-planar buffers, but userspace does not. */ b->length = vb->planes[0].length; b->bytesused = vb->planes[0].bytesused; if (q->memory == VB2_MEMORY_MMAP) b->m.offset = vb->planes[0].m.offset; else if (q->memory == VB2_MEMORY_USERPTR) b->m.userptr = vb->planes[0].m.userptr; else if (q->memory == VB2_MEMORY_DMABUF) b->m.fd = vb->planes[0].m.fd; } /* * Clear any buffer state related flags. */ b->flags &= ~V4L2_BUFFER_MASK_FLAGS; b->flags |= q->timestamp_flags & V4L2_BUF_FLAG_TIMESTAMP_MASK; if (!q->copy_timestamp) { /* * For non-COPY timestamps, drop timestamp source bits * and obtain the timestamp source from the queue. */ b->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK; b->flags |= q->timestamp_flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK; } switch (vb->state) { case VB2_BUF_STATE_QUEUED: case VB2_BUF_STATE_ACTIVE: b->flags |= V4L2_BUF_FLAG_QUEUED; break; case VB2_BUF_STATE_IN_REQUEST: b->flags |= V4L2_BUF_FLAG_IN_REQUEST; break; case VB2_BUF_STATE_ERROR: b->flags |= V4L2_BUF_FLAG_ERROR; fallthrough; case VB2_BUF_STATE_DONE: b->flags |= V4L2_BUF_FLAG_DONE; break; case VB2_BUF_STATE_PREPARING: case VB2_BUF_STATE_DEQUEUED: /* nothing */ break; } if ((vb->state == VB2_BUF_STATE_DEQUEUED || vb->state == VB2_BUF_STATE_IN_REQUEST) && vb->synced && vb->prepared) b->flags |= V4L2_BUF_FLAG_PREPARED; if (vb2_buffer_in_use(q, vb)) b->flags |= V4L2_BUF_FLAG_MAPPED; if (vbuf->request_fd >= 0) { b->flags |= V4L2_BUF_FLAG_REQUEST_FD; b->request_fd = vbuf->request_fd; } } /* * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a * v4l2_buffer by the userspace. It also verifies that struct * v4l2_buffer has a valid number of planes. */ static int __fill_vb2_buffer(struct vb2_buffer *vb, struct vb2_plane *planes) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); unsigned int plane; if (!vb->vb2_queue->copy_timestamp) vb->timestamp = 0; for (plane = 0; plane < vb->num_planes; ++plane) { if (vb->vb2_queue->memory != VB2_MEMORY_MMAP) { planes[plane].m = vbuf->planes[plane].m; planes[plane].length = vbuf->planes[plane].length; } planes[plane].bytesused = vbuf->planes[plane].bytesused; planes[plane].data_offset = vbuf->planes[plane].data_offset; } return 0; } static const struct vb2_buf_ops v4l2_buf_ops = { .verify_planes_array = __verify_planes_array_core, .init_buffer = __init_vb2_v4l2_buffer, .fill_user_buffer = __fill_v4l2_buffer, .fill_vb2_buffer = __fill_vb2_buffer, .copy_timestamp = __copy_timestamp, }; struct vb2_buffer *vb2_find_buffer(struct vb2_queue *q, u64 timestamp) { unsigned int i; struct vb2_buffer *vb2; /* * This loop doesn't scale if there is a really large number of buffers. * Maybe something more efficient will be needed in this case. */ for (i = 0; i < q->max_num_buffers; i++) { vb2 = vb2_get_buffer(q, i); if (!vb2) continue; if (vb2->copied_timestamp && vb2->timestamp == timestamp) return vb2; } return NULL; } EXPORT_SYMBOL_GPL(vb2_find_buffer); /* * vb2_querybuf() - query video buffer information * @q: vb2 queue * @b: buffer struct passed from userspace to vidioc_querybuf handler * in driver * * Should be called from vidioc_querybuf ioctl handler in driver. * This function will verify the passed v4l2_buffer structure and fill the * relevant information for the userspace. * * The return values from this function are intended to be directly returned * from vidioc_querybuf handler in driver. */ int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b) { struct vb2_buffer *vb; int ret; if (b->type != q->type) { dprintk(q, 1, "wrong buffer type\n"); return -EINVAL; } vb = vb2_get_buffer(q, b->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", b->index); return -EINVAL; } ret = __verify_planes_array(vb, b); if (!ret) vb2_core_querybuf(q, vb, b); return ret; } EXPORT_SYMBOL(vb2_querybuf); static void vb2_set_flags_and_caps(struct vb2_queue *q, u32 memory, u32 *flags, u32 *caps, u32 *max_num_bufs) { if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) { /* * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only, * but in order to avoid bugs we zero out all bits. */ *flags = 0; } else { /* Clear all unknown flags. */ *flags &= V4L2_MEMORY_FLAG_NON_COHERENT; } *caps = V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS; if (q->io_modes & VB2_MMAP) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP; if (q->io_modes & VB2_USERPTR) *caps |= V4L2_BUF_CAP_SUPPORTS_USERPTR; if (q->io_modes & VB2_DMABUF) *caps |= V4L2_BUF_CAP_SUPPORTS_DMABUF; if (q->subsystem_flags & VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF) *caps |= V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF; if (q->allow_cache_hints && q->io_modes & VB2_MMAP) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS; if (q->supports_requests) *caps |= V4L2_BUF_CAP_SUPPORTS_REQUESTS; if (max_num_bufs) { *max_num_bufs = q->max_num_buffers; *caps |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS; } } int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req) { int ret = vb2_verify_memory_type(q, req->memory, req->type); u32 flags = req->flags; vb2_set_flags_and_caps(q, req->memory, &flags, &req->capabilities, NULL); req->flags = flags; return ret ? ret : vb2_core_reqbufs(q, req->memory, req->flags, &req->count); } EXPORT_SYMBOL_GPL(vb2_reqbufs); int vb2_prepare_buf(struct vb2_queue *q, struct media_device *mdev, struct v4l2_buffer *b) { struct vb2_buffer *vb; int ret; if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } if (b->flags & V4L2_BUF_FLAG_REQUEST_FD) return -EINVAL; vb = vb2_get_buffer(q, b->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", b->index); return -EINVAL; } ret = vb2_queue_or_prepare_buf(q, mdev, vb, b, true, NULL); return ret ? ret : vb2_core_prepare_buf(q, vb, b); } EXPORT_SYMBOL_GPL(vb2_prepare_buf); int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create) { unsigned requested_planes = 1; unsigned requested_sizes[VIDEO_MAX_PLANES]; struct v4l2_format *f = &create->format; int ret = vb2_verify_memory_type(q, create->memory, f->type); unsigned i; create->index = vb2_get_num_buffers(q); vb2_set_flags_and_caps(q, create->memory, &create->flags, &create->capabilities, &create->max_num_buffers); if (create->count == 0) return ret != -EBUSY ? ret : 0; switch (f->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: requested_planes = f->fmt.pix_mp.num_planes; if (requested_planes == 0 || requested_planes > VIDEO_MAX_PLANES) return -EINVAL; for (i = 0; i < requested_planes; i++) requested_sizes[i] = f->fmt.pix_mp.plane_fmt[i].sizeimage; break; case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: requested_sizes[0] = f->fmt.pix.sizeimage; break; case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: requested_sizes[0] = f->fmt.vbi.samples_per_line * (f->fmt.vbi.count[0] + f->fmt.vbi.count[1]); break; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: requested_sizes[0] = f->fmt.sliced.io_size; break; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: requested_sizes[0] = f->fmt.sdr.buffersize; break; case V4L2_BUF_TYPE_META_CAPTURE: case V4L2_BUF_TYPE_META_OUTPUT: requested_sizes[0] = f->fmt.meta.buffersize; break; default: return -EINVAL; } for (i = 0; i < requested_planes; i++) if (requested_sizes[i] == 0) return -EINVAL; return ret ? ret : vb2_core_create_bufs(q, create->memory, create->flags, &create->count, requested_planes, requested_sizes); } EXPORT_SYMBOL_GPL(vb2_create_bufs); int vb2_qbuf(struct vb2_queue *q, struct media_device *mdev, struct v4l2_buffer *b) { struct media_request *req = NULL; struct vb2_buffer *vb; int ret; if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } vb = vb2_get_buffer(q, b->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", b->index); return -EINVAL; } ret = vb2_queue_or_prepare_buf(q, mdev, vb, b, false, &req); if (ret) return ret; ret = vb2_core_qbuf(q, vb, b, req); if (req) media_request_put(req); return ret; } EXPORT_SYMBOL_GPL(vb2_qbuf); int vb2_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b, bool nonblocking) { int ret; if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } if (b->type != q->type) { dprintk(q, 1, "invalid buffer type\n"); return -EINVAL; } ret = vb2_core_dqbuf(q, NULL, b, nonblocking); if (!q->is_output && b->flags & V4L2_BUF_FLAG_DONE && b->flags & V4L2_BUF_FLAG_LAST) q->last_buffer_dequeued = true; /* * After calling the VIDIOC_DQBUF V4L2_BUF_FLAG_DONE must be * cleared. */ b->flags &= ~V4L2_BUF_FLAG_DONE; return ret; } EXPORT_SYMBOL_GPL(vb2_dqbuf); int vb2_streamon(struct vb2_queue *q, enum v4l2_buf_type type) { if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } return vb2_core_streamon(q, type); } EXPORT_SYMBOL_GPL(vb2_streamon); int vb2_streamoff(struct vb2_queue *q, enum v4l2_buf_type type) { if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } return vb2_core_streamoff(q, type); } EXPORT_SYMBOL_GPL(vb2_streamoff); int vb2_expbuf(struct vb2_queue *q, struct v4l2_exportbuffer *eb) { struct vb2_buffer *vb; vb = vb2_get_buffer(q, eb->index); if (!vb) { dprintk(q, 1, "can't find the requested buffer %u\n", eb->index); return -EINVAL; } return vb2_core_expbuf(q, &eb->fd, eb->type, vb, eb->plane, eb->flags); } EXPORT_SYMBOL_GPL(vb2_expbuf); int vb2_queue_init_name(struct vb2_queue *q, const char *name) { /* * Sanity check */ if (WARN_ON(!q) || WARN_ON(q->timestamp_flags & ~(V4L2_BUF_FLAG_TIMESTAMP_MASK | V4L2_BUF_FLAG_TSTAMP_SRC_MASK))) return -EINVAL; /* Warn that the driver should choose an appropriate timestamp type */ WARN_ON((q->timestamp_flags & V4L2_BUF_FLAG_TIMESTAMP_MASK) == V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN); /* Warn that vb2_memory should match with v4l2_memory */ if (WARN_ON(VB2_MEMORY_MMAP != (int)V4L2_MEMORY_MMAP) || WARN_ON(VB2_MEMORY_USERPTR != (int)V4L2_MEMORY_USERPTR) || WARN_ON(VB2_MEMORY_DMABUF != (int)V4L2_MEMORY_DMABUF)) return -EINVAL; if (q->buf_struct_size == 0) q->buf_struct_size = sizeof(struct vb2_v4l2_buffer); q->buf_ops = &v4l2_buf_ops; q->is_multiplanar = V4L2_TYPE_IS_MULTIPLANAR(q->type); q->is_output = V4L2_TYPE_IS_OUTPUT(q->type); q->copy_timestamp = (q->timestamp_flags & V4L2_BUF_FLAG_TIMESTAMP_MASK) == V4L2_BUF_FLAG_TIMESTAMP_COPY; /* * For compatibility with vb1: if QBUF hasn't been called yet, then * return EPOLLERR as well. This only affects capture queues, output * queues will always initialize waiting_for_buffers to false. */ q->quirk_poll_must_check_waiting_for_buffers = true; if (name) strscpy(q->name, name, sizeof(q->name)); else q->name[0] = '\0'; return vb2_core_queue_init(q); } EXPORT_SYMBOL_GPL(vb2_queue_init_name); int vb2_queue_init(struct vb2_queue *q) { return vb2_queue_init_name(q, NULL); } EXPORT_SYMBOL_GPL(vb2_queue_init); void vb2_queue_release(struct vb2_queue *q) { vb2_core_queue_release(q); } EXPORT_SYMBOL_GPL(vb2_queue_release); int vb2_queue_change_type(struct vb2_queue *q, unsigned int type) { if (type == q->type) return 0; if (vb2_is_busy(q)) return -EBUSY; q->type = type; return 0; } EXPORT_SYMBOL_GPL(vb2_queue_change_type); __poll_t vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait) { struct video_device *vfd = video_devdata(file); __poll_t res; res = vb2_core_poll(q, file, wait); if (test_bit(V4L2_FL_USES_V4L2_FH, &vfd->flags)) { struct v4l2_fh *fh = file->private_data; poll_wait(file, &fh->wait, wait); if (v4l2_event_pending(fh)) res |= EPOLLPRI; } return res; } EXPORT_SYMBOL_GPL(vb2_poll); /* * The following functions are not part of the vb2 core API, but are helper * functions that plug into struct v4l2_ioctl_ops, struct v4l2_file_operations * and struct vb2_ops. * They contain boilerplate code that most if not all drivers have to do * and so they simplify the driver code. */ /* vb2 ioctl helpers */ int vb2_ioctl_reqbufs(struct file *file, void *priv, struct v4l2_requestbuffers *p) { struct video_device *vdev = video_devdata(file); int res = vb2_verify_memory_type(vdev->queue, p->memory, p->type); u32 flags = p->flags; vb2_set_flags_and_caps(vdev->queue, p->memory, &flags, &p->capabilities, NULL); p->flags = flags; if (res) return res; if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; res = vb2_core_reqbufs(vdev->queue, p->memory, p->flags, &p->count); /* If count == 0, then the owner has released all buffers and he is no longer owner of the queue. Otherwise we have a new owner. */ if (res == 0) vdev->queue->owner = p->count ? file->private_data : NULL; return res; } EXPORT_SYMBOL_GPL(vb2_ioctl_reqbufs); int vb2_ioctl_create_bufs(struct file *file, void *priv, struct v4l2_create_buffers *p) { struct video_device *vdev = video_devdata(file); int res = vb2_verify_memory_type(vdev->queue, p->memory, p->format.type); p->index = vb2_get_num_buffers(vdev->queue); vb2_set_flags_and_caps(vdev->queue, p->memory, &p->flags, &p->capabilities, &p->max_num_buffers); /* * If count == 0, then just check if memory and type are valid. * Any -EBUSY result from vb2_verify_memory_type can be mapped to 0. */ if (p->count == 0) return res != -EBUSY ? res : 0; if (res) return res; if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; res = vb2_create_bufs(vdev->queue, p); if (res == 0) vdev->queue->owner = file->private_data; return res; } EXPORT_SYMBOL_GPL(vb2_ioctl_create_bufs); int vb2_ioctl_prepare_buf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_prepare_buf(vdev->queue, vdev->v4l2_dev->mdev, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_prepare_buf); int vb2_ioctl_querybuf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); /* No need to call vb2_queue_is_busy(), anyone can query buffers. */ return vb2_querybuf(vdev->queue, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_querybuf); int vb2_ioctl_qbuf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_qbuf(vdev->queue, vdev->v4l2_dev->mdev, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_qbuf); int vb2_ioctl_dqbuf(struct file *file, void *priv, struct v4l2_buffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_dqbuf(vdev->queue, p, file->f_flags & O_NONBLOCK); } EXPORT_SYMBOL_GPL(vb2_ioctl_dqbuf); int vb2_ioctl_streamon(struct file *file, void *priv, enum v4l2_buf_type i) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_streamon(vdev->queue, i); } EXPORT_SYMBOL_GPL(vb2_ioctl_streamon); int vb2_ioctl_streamoff(struct file *file, void *priv, enum v4l2_buf_type i) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_streamoff(vdev->queue, i); } EXPORT_SYMBOL_GPL(vb2_ioctl_streamoff); int vb2_ioctl_expbuf(struct file *file, void *priv, struct v4l2_exportbuffer *p) { struct video_device *vdev = video_devdata(file); if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; return vb2_expbuf(vdev->queue, p); } EXPORT_SYMBOL_GPL(vb2_ioctl_expbuf); /* v4l2_file_operations helpers */ int vb2_fop_mmap(struct file *file, struct vm_area_struct *vma) { struct video_device *vdev = video_devdata(file); return vb2_mmap(vdev->queue, vma); } EXPORT_SYMBOL_GPL(vb2_fop_mmap); int _vb2_fop_release(struct file *file, struct mutex *lock) { struct video_device *vdev = video_devdata(file); if (lock) mutex_lock(lock); if (!vdev->queue->owner || file->private_data == vdev->queue->owner) { vb2_queue_release(vdev->queue); vdev->queue->owner = NULL; } if (lock) mutex_unlock(lock); return v4l2_fh_release(file); } EXPORT_SYMBOL_GPL(_vb2_fop_release); int vb2_fop_release(struct file *file) { struct video_device *vdev = video_devdata(file); struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; return _vb2_fop_release(file, lock); } EXPORT_SYMBOL_GPL(vb2_fop_release); ssize_t vb2_fop_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct video_device *vdev = video_devdata(file); struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; int err = -EBUSY; if (!(vdev->queue->io_modes & VB2_WRITE)) return -EINVAL; if (lock && mutex_lock_interruptible(lock)) return -ERESTARTSYS; if (vb2_queue_is_busy(vdev->queue, file)) goto exit; err = vb2_write(vdev->queue, buf, count, ppos, file->f_flags & O_NONBLOCK); if (vdev->queue->fileio) vdev->queue->owner = file->private_data; exit: if (lock) mutex_unlock(lock); return err; } EXPORT_SYMBOL_GPL(vb2_fop_write); ssize_t vb2_fop_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct video_device *vdev = video_devdata(file); struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; int err = -EBUSY; if (!(vdev->queue->io_modes & VB2_READ)) return -EINVAL; if (lock && mutex_lock_interruptible(lock)) return -ERESTARTSYS; if (vb2_queue_is_busy(vdev->queue, file)) goto exit; vdev->queue->owner = file->private_data; err = vb2_read(vdev->queue, buf, count, ppos, file->f_flags & O_NONBLOCK); if (!vdev->queue->fileio) vdev->queue->owner = NULL; exit: if (lock) mutex_unlock(lock); return err; } EXPORT_SYMBOL_GPL(vb2_fop_read); __poll_t vb2_fop_poll(struct file *file, poll_table *wait) { struct video_device *vdev = video_devdata(file); struct vb2_queue *q = vdev->queue; struct mutex *lock = q->lock ? q->lock : vdev->lock; __poll_t res; void *fileio; /* * If this helper doesn't know how to lock, then you shouldn't be using * it but you should write your own. */ WARN_ON(!lock); if (lock && mutex_lock_interruptible(lock)) return EPOLLERR; fileio = q->fileio; res = vb2_poll(vdev->queue, file, wait); /* If fileio was started, then we have a new queue owner. */ if (!fileio && q->fileio) q->owner = file->private_data; if (lock) mutex_unlock(lock); return res; } EXPORT_SYMBOL_GPL(vb2_fop_poll); #ifndef CONFIG_MMU unsigned long vb2_fop_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct video_device *vdev = video_devdata(file); return vb2_get_unmapped_area(vdev->queue, addr, len, pgoff, flags); } EXPORT_SYMBOL_GPL(vb2_fop_get_unmapped_area); #endif void vb2_video_unregister_device(struct video_device *vdev) { /* Check if vdev was ever registered at all */ if (!vdev || !video_is_registered(vdev)) return; /* * Calling this function only makes sense if vdev->queue is set. * If it is NULL, then just call video_unregister_device() instead. */ WARN_ON(!vdev->queue); /* * Take a reference to the device since video_unregister_device() * calls device_unregister(), but we don't want that to release * the device since we want to clean up the queue first. */ get_device(&vdev->dev); video_unregister_device(vdev); if (vdev->queue) { struct mutex *lock = vdev->queue->lock ? vdev->queue->lock : vdev->lock; if (lock) mutex_lock(lock); vb2_queue_release(vdev->queue); vdev->queue->owner = NULL; if (lock) mutex_unlock(lock); } /* * Now we put the device, and in most cases this will release * everything. */ put_device(&vdev->dev); } EXPORT_SYMBOL_GPL(vb2_video_unregister_device); /* vb2_ops helpers. Only use if vq->lock is non-NULL. */ void vb2_ops_wait_prepare(struct vb2_queue *vq) { mutex_unlock(vq->lock); } EXPORT_SYMBOL_GPL(vb2_ops_wait_prepare); void vb2_ops_wait_finish(struct vb2_queue *vq) { mutex_lock(vq->lock); } EXPORT_SYMBOL_GPL(vb2_ops_wait_finish); /* * Note that this function is called during validation time and * thus the req_queue_mutex is held to ensure no request objects * can be added or deleted while validating. So there is no need * to protect the objects list. */ int vb2_request_validate(struct media_request *req) { struct media_request_object *obj; int ret = 0; if (!vb2_request_buffer_cnt(req)) return -ENOENT; list_for_each_entry(obj, &req->objects, list) { if (!obj->ops->prepare) continue; ret = obj->ops->prepare(obj); if (ret) break; } if (ret) { list_for_each_entry_continue_reverse(obj, &req->objects, list) if (obj->ops->unprepare) obj->ops->unprepare(obj); return ret; } return 0; } EXPORT_SYMBOL_GPL(vb2_request_validate); void vb2_request_queue(struct media_request *req) { struct media_request_object *obj, *obj_safe; /* * Queue all objects. Note that buffer objects are at the end of the * objects list, after all other object types. Once buffer objects * are queued, the driver might delete them immediately (if the driver * processes the buffer at once), so we have to use * list_for_each_entry_safe() to handle the case where the object we * queue is deleted. */ list_for_each_entry_safe(obj, obj_safe, &req->objects, list) if (obj->ops->queue) obj->ops->queue(obj); } EXPORT_SYMBOL_GPL(vb2_request_queue); MODULE_DESCRIPTION("Driver helper framework for Video for Linux 2"); MODULE_AUTHOR("Pawel Osciak <pawel@osciak.com>, Marek Szyprowski"); MODULE_LICENSE("GPL");
130 131 2 131 131 7 7 6 7 1 8 8 8 8 83 81 2 1332 1330 1330 1 1332 606 131 482 481 481 482 481 170 481 486 481 3 1194 1193 78 78 483 345 140 98 43 140 103 310 73 1702 1353 367 909 908 401 466 857 2 855 851 3 3 58 501 101 381 18 479 482 482 24 3 25 25 1 25 25 25 25 32 5 31 1 27 142 1 96 28 84 12 17 99 110 7 23 143 143 15 112 541 170 170 47 2 45 433 432 12 10 1 15 379 43 399 400 12 12 15 15 355 414 251 250 795 2 1 793 792 794 423 4 4 1 148 263 263 105 105 361 6 2 138 211 311 3 309 9 2 7 6 2 23 19 19 19 19 18 1791 669 1030 1029 158 159 104 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/super.c * * Copyright (C) 1991, 1992 Linus Torvalds * * super.c contains code to handle: - mount structures * - super-block tables * - filesystem drivers list * - mount system call * - umount system call * - ustat system call * * GK 2/5/95 - Changed to support mounting the root fs via NFS * * Added kerneld support: Jacques Gelinas and Bjorn Ekwall * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 * Added options to /proc/mounts: * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 */ #include <linux/export.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/writeback.h> /* for the emergency remount stuff */ #include <linux/idr.h> #include <linux/mutex.h> #include <linux/backing-dev.h> #include <linux/rculist_bl.h> #include <linux/fscrypt.h> #include <linux/fsnotify.h> #include <linux/lockdep.h> #include <linux/user_namespace.h> #include <linux/fs_context.h> #include <uapi/linux/mount.h> #include "internal.h" static int thaw_super_locked(struct super_block *sb, enum freeze_holder who); static LIST_HEAD(super_blocks); static DEFINE_SPINLOCK(sb_lock); static char *sb_writers_name[SB_FREEZE_LEVELS] = { "sb_writers", "sb_pagefaults", "sb_internal", }; static inline void __super_lock(struct super_block *sb, bool excl) { if (excl) down_write(&sb->s_umount); else down_read(&sb->s_umount); } static inline void super_unlock(struct super_block *sb, bool excl) { if (excl) up_write(&sb->s_umount); else up_read(&sb->s_umount); } static inline void __super_lock_excl(struct super_block *sb) { __super_lock(sb, true); } static inline void super_unlock_excl(struct super_block *sb) { super_unlock(sb, true); } static inline void super_unlock_shared(struct super_block *sb) { super_unlock(sb, false); } static bool super_flags(const struct super_block *sb, unsigned int flags) { /* * Pairs with smp_store_release() in super_wake() and ensures * that we see @flags after we're woken. */ return smp_load_acquire(&sb->s_flags) & flags; } /** * super_lock - wait for superblock to become ready and lock it * @sb: superblock to wait for * @excl: whether exclusive access is required * * If the superblock has neither passed through vfs_get_tree() or * generic_shutdown_super() yet wait for it to happen. Either superblock * creation will succeed and SB_BORN is set by vfs_get_tree() or we're * woken and we'll see SB_DYING. * * The caller must have acquired a temporary reference on @sb->s_count. * * Return: The function returns true if SB_BORN was set and with * s_umount held. The function returns false if SB_DYING was * set and without s_umount held. */ static __must_check bool super_lock(struct super_block *sb, bool excl) { lockdep_assert_not_held(&sb->s_umount); /* wait until the superblock is ready or dying */ wait_var_event(&sb->s_flags, super_flags(sb, SB_BORN | SB_DYING)); /* Don't pointlessly acquire s_umount. */ if (super_flags(sb, SB_DYING)) return false; __super_lock(sb, excl); /* * Has gone through generic_shutdown_super() in the meantime. * @sb->s_root is NULL and @sb->s_active is 0. No one needs to * grab a reference to this. Tell them so. */ if (sb->s_flags & SB_DYING) { super_unlock(sb, excl); return false; } WARN_ON_ONCE(!(sb->s_flags & SB_BORN)); return true; } /* wait and try to acquire read-side of @sb->s_umount */ static inline bool super_lock_shared(struct super_block *sb) { return super_lock(sb, false); } /* wait and try to acquire write-side of @sb->s_umount */ static inline bool super_lock_excl(struct super_block *sb) { return super_lock(sb, true); } /* wake waiters */ #define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD) static void super_wake(struct super_block *sb, unsigned int flag) { WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS)); WARN_ON_ONCE(hweight32(flag & SUPER_WAKE_FLAGS) > 1); /* * Pairs with smp_load_acquire() in super_lock() to make sure * all initializations in the superblock are seen by the user * seeing SB_BORN sent. */ smp_store_release(&sb->s_flags, sb->s_flags | flag); /* * Pairs with the barrier in prepare_to_wait_event() to make sure * ___wait_var_event() either sees SB_BORN set or * waitqueue_active() check in wake_up_var() sees the waiter. */ smp_mb(); wake_up_var(&sb->s_flags); } /* * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. * If that happens we could trigger unregistering the shrinker from within the * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we * take a passive reference to the superblock to avoid this from occurring. */ static unsigned long super_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct super_block *sb; long fs_objects = 0; long total_objects; long freed = 0; long dentries; long inodes; sb = shrink->private_data; /* * Deadlock avoidance. We may hold various FS locks, and we don't want * to recurse into the FS that called us in clear_inode() and friends.. */ if (!(sc->gfp_mask & __GFP_FS)) return SHRINK_STOP; if (!super_trylock_shared(sb)) return SHRINK_STOP; if (sb->s_op->nr_cached_objects) fs_objects = sb->s_op->nr_cached_objects(sb, sc); inodes = list_lru_shrink_count(&sb->s_inode_lru, sc); dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc); total_objects = dentries + inodes + fs_objects + 1; if (!total_objects) total_objects = 1; /* proportion the scan between the caches */ dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects); /* * prune the dcache first as the icache is pinned by it, then * prune the icache, followed by the filesystem specific caches * * Ensure that we always scan at least one object - memcg kmem * accounting uses this to fully empty the caches. */ sc->nr_to_scan = dentries + 1; freed = prune_dcache_sb(sb, sc); sc->nr_to_scan = inodes + 1; freed += prune_icache_sb(sb, sc); if (fs_objects) { sc->nr_to_scan = fs_objects + 1; freed += sb->s_op->free_cached_objects(sb, sc); } super_unlock_shared(sb); return freed; } static unsigned long super_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct super_block *sb; long total_objects = 0; sb = shrink->private_data; /* * We don't call super_trylock_shared() here as it is a scalability * bottleneck, so we're exposed to partial setup state. The shrinker * rwsem does not protect filesystem operations backing * list_lru_shrink_count() or s_op->nr_cached_objects(). Counts can * change between super_cache_count and super_cache_scan, so we really * don't need locks here. * * However, if we are currently mounting the superblock, the underlying * filesystem might be in a state of partial construction and hence it * is dangerous to access it. super_trylock_shared() uses a SB_BORN check * to avoid this situation, so do the same here. The memory barrier is * matched with the one in mount_fs() as we don't hold locks here. */ if (!(sb->s_flags & SB_BORN)) return 0; smp_rmb(); if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc); total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc); total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc); if (!total_objects) return SHRINK_EMPTY; total_objects = vfs_pressure_ratio(total_objects); return total_objects; } static void destroy_super_work(struct work_struct *work) { struct super_block *s = container_of(work, struct super_block, destroy_work); int i; for (i = 0; i < SB_FREEZE_LEVELS; i++) percpu_free_rwsem(&s->s_writers.rw_sem[i]); kfree(s); } static void destroy_super_rcu(struct rcu_head *head) { struct super_block *s = container_of(head, struct super_block, rcu); INIT_WORK(&s->destroy_work, destroy_super_work); schedule_work(&s->destroy_work); } /* Free a superblock that has never been seen by anyone */ static void destroy_unused_super(struct super_block *s) { if (!s) return; super_unlock_excl(s); list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); shrinker_free(s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); } /** * alloc_super - create new superblock * @type: filesystem type superblock should belong to * @flags: the mount flags * @user_ns: User namespace for the super_block * * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ static struct super_block *alloc_super(struct file_system_type *type, int flags, struct user_namespace *user_ns) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_KERNEL); static const struct super_operations default_op; int i; if (!s) return NULL; INIT_LIST_HEAD(&s->s_mounts); s->s_user_ns = get_user_ns(user_ns); init_rwsem(&s->s_umount); lockdep_set_class(&s->s_umount, &type->s_umount_key); /* * sget() can have s_umount recursion. * * When it cannot find a suitable sb, it allocates a new * one (this one), and tries again to find a suitable old * one. * * In case that succeeds, it will acquire the s_umount * lock of the old one. Since these are clearly distrinct * locks, and this object isn't exposed yet, there's no * risk of deadlocks. * * Annotate this by putting this lock in a different * subclass. */ down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); if (security_sb_alloc(s)) goto fail; for (i = 0; i < SB_FREEZE_LEVELS; i++) { if (__percpu_init_rwsem(&s->s_writers.rw_sem[i], sb_writers_name[i], &type->s_writers_key[i])) goto fail; } s->s_bdi = &noop_backing_dev_info; s->s_flags = flags; if (s->s_user_ns != &init_user_ns) s->s_iflags |= SB_I_NODEV; INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_roots); mutex_init(&s->s_sync_lock); INIT_LIST_HEAD(&s->s_inodes); spin_lock_init(&s->s_inode_list_lock); INIT_LIST_HEAD(&s->s_inodes_wb); spin_lock_init(&s->s_inode_wblist_lock); s->s_count = 1; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); init_rwsem(&s->s_dquot.dqio_sem); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; s->s_time_min = TIME64_MIN; s->s_time_max = TIME64_MAX; s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "sb-%s", type->name); if (!s->s_shrink) goto fail; s->s_shrink->scan_objects = super_cache_scan; s->s_shrink->count_objects = super_cache_count; s->s_shrink->batch = 1024; s->s_shrink->private_data = s; if (list_lru_init_memcg(&s->s_dentry_lru, s->s_shrink)) goto fail; if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink)) goto fail; return s; fail: destroy_unused_super(s); return NULL; } /* Superblock refcounting */ /* * Drop a superblock's refcount. The caller must hold sb_lock. */ static void __put_super(struct super_block *s) { if (!--s->s_count) { list_del_init(&s->s_list); WARN_ON(s->s_dentry_lru.node); WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); } } /** * put_super - drop a temporary reference to superblock * @sb: superblock in question * * Drops a temporary reference, frees superblock if there's no * references left. */ void put_super(struct super_block *sb) { spin_lock(&sb_lock); __put_super(sb); spin_unlock(&sb_lock); } static void kill_super_notify(struct super_block *sb) { lockdep_assert_not_held(&sb->s_umount); /* already notified earlier */ if (sb->s_flags & SB_DEAD) return; /* * Remove it from @fs_supers so it isn't found by new * sget{_fc}() walkers anymore. Any concurrent mounter still * managing to grab a temporary reference is guaranteed to * already see SB_DYING and will wait until we notify them about * SB_DEAD. */ spin_lock(&sb_lock); hlist_del_init(&sb->s_instances); spin_unlock(&sb_lock); /* * Let concurrent mounts know that this thing is really dead. * We don't need @sb->s_umount here as every concurrent caller * will see SB_DYING and either discard the superblock or wait * for SB_DEAD. */ super_wake(sb, SB_DEAD); } /** * deactivate_locked_super - drop an active reference to superblock * @s: superblock to deactivate * * Drops an active reference to superblock, converting it into a temporary * one if there is no other active references left. In that case we * tell fs driver to shut it down and drop the temporary reference we * had just acquired. * * Caller holds exclusive lock on superblock; that lock is released. */ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { shrinker_free(s->s_shrink); fs->kill_sb(s); kill_super_notify(s); /* * Since list_lru_destroy() may sleep, we cannot call it from * put_super(), where we hold the sb_lock. Therefore we destroy * the lru lists right now. */ list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); put_filesystem(fs); put_super(s); } else { super_unlock_excl(s); } } EXPORT_SYMBOL(deactivate_locked_super); /** * deactivate_super - drop an active reference to superblock * @s: superblock to deactivate * * Variant of deactivate_locked_super(), except that superblock is *not* * locked by caller. If we are going to drop the final active reference, * lock will be acquired prior to that. */ void deactivate_super(struct super_block *s) { if (!atomic_add_unless(&s->s_active, -1, 1)) { __super_lock_excl(s); deactivate_locked_super(s); } } EXPORT_SYMBOL(deactivate_super); /** * grab_super - acquire an active reference to a superblock * @sb: superblock to acquire * * Acquire a temporary reference on a superblock and try to trade it for * an active reference. This is used in sget{_fc}() to wait for a * superblock to either become SB_BORN or for it to pass through * sb->kill() and be marked as SB_DEAD. * * Return: This returns true if an active reference could be acquired, * false if not. */ static bool grab_super(struct super_block *sb) { bool locked; sb->s_count++; spin_unlock(&sb_lock); locked = super_lock_excl(sb); if (locked) { if (atomic_inc_not_zero(&sb->s_active)) { put_super(sb); return true; } super_unlock_excl(sb); } wait_var_event(&sb->s_flags, super_flags(sb, SB_DEAD)); put_super(sb); return false; } /* * super_trylock_shared - try to grab ->s_umount shared * @sb: reference we are trying to grab * * Try to prevent fs shutdown. This is used in places where we * cannot take an active reference but we need to ensure that the * filesystem is not shut down while we are working on it. It returns * false if we cannot acquire s_umount or if we lose the race and * filesystem already got into shutdown, and returns true with the s_umount * lock held in read mode in case of success. On successful return, * the caller must drop the s_umount lock when done. * * Note that unlike get_super() et.al. this one does *not* bump ->s_count. * The reason why it's safe is that we are OK with doing trylock instead * of down_read(). There's a couple of places that are OK with that, but * it's very much not a general-purpose interface. */ bool super_trylock_shared(struct super_block *sb) { if (down_read_trylock(&sb->s_umount)) { if (!(sb->s_flags & SB_DYING) && sb->s_root && (sb->s_flags & SB_BORN)) return true; super_unlock_shared(sb); } return false; } /** * retire_super - prevents superblock from being reused * @sb: superblock to retire * * The function marks superblock to be ignored in superblock test, which * prevents it from being reused for any new mounts. If the superblock has * a private bdi, it also unregisters it, but doesn't reduce the refcount * of the superblock to prevent potential races. The refcount is reduced * by generic_shutdown_super(). The function can not be called * concurrently with generic_shutdown_super(). It is safe to call the * function multiple times, subsequent calls have no effect. * * The marker will affect the re-use only for block-device-based * superblocks. Other superblocks will still get marked if this function * is used, but that will not affect their reusability. */ void retire_super(struct super_block *sb) { WARN_ON(!sb->s_bdev); __super_lock_excl(sb); if (sb->s_iflags & SB_I_PERSB_BDI) { bdi_unregister(sb->s_bdi); sb->s_iflags &= ~SB_I_PERSB_BDI; } sb->s_iflags |= SB_I_RETIRED; super_unlock_excl(sb); } EXPORT_SYMBOL(retire_super); /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill * * generic_shutdown_super() does all fs-independent work on superblock * shutdown. Typical ->kill_sb() should pick all fs-specific objects * that need destruction out of superblock, call generic_shutdown_super() * and release aforementioned objects. Note: dentries and inodes _are_ * taken care of and do not need specific handling. * * Upon calling this function, the filesystem may no longer alter or * rearrange the set of dentries belonging to this super_block, nor may it * change the attachments of dentries to inodes. */ void generic_shutdown_super(struct super_block *sb) { const struct super_operations *sop = sb->s_op; if (sb->s_root) { shrink_dcache_for_umount(sb); sync_filesystem(sb); sb->s_flags &= ~SB_ACTIVE; cgroup_writeback_umount(); /* Evict all inodes with zero refcount. */ evict_inodes(sb); /* * Clean up and evict any inodes that still have references due * to fsnotify or the security policy. */ fsnotify_sb_delete(sb); security_sb_delete(sb); if (sb->s_dio_done_wq) { destroy_workqueue(sb->s_dio_done_wq); sb->s_dio_done_wq = NULL; } if (sop->put_super) sop->put_super(sb); /* * Now that all potentially-encrypted inodes have been evicted, * the fscrypt keyring can be destroyed. */ fscrypt_destroy_keyring(sb); if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes), "VFS: Busy inodes after unmount of %s (%s)", sb->s_id, sb->s_type->name)) { /* * Adding a proper bailout path here would be hard, but * we can at least make it more likely that a later * iput_final() or such crashes cleanly. */ struct inode *inode; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { inode->i_op = VFS_PTR_POISON; inode->i_sb = VFS_PTR_POISON; inode->i_mapping = VFS_PTR_POISON; } spin_unlock(&sb->s_inode_list_lock); } } /* * Broadcast to everyone that grabbed a temporary reference to this * superblock before we removed it from @fs_supers that the superblock * is dying. Every walker of @fs_supers outside of sget{_fc}() will now * discard this superblock and treat it as dead. * * We leave the superblock on @fs_supers so it can be found by * sget{_fc}() until we passed sb->kill_sb(). */ super_wake(sb, SB_DYING); super_unlock_excl(sb); if (sb->s_bdi != &noop_backing_dev_info) { if (sb->s_iflags & SB_I_PERSB_BDI) bdi_unregister(sb->s_bdi); bdi_put(sb->s_bdi); sb->s_bdi = &noop_backing_dev_info; } } EXPORT_SYMBOL(generic_shutdown_super); bool mount_capable(struct fs_context *fc) { if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) return capable(CAP_SYS_ADMIN); else return ns_capable(fc->user_ns, CAP_SYS_ADMIN); } /** * sget_fc - Find or create a superblock * @fc: Filesystem context. * @test: Comparison callback * @set: Setup callback * * Create a new superblock or find an existing one. * * The @test callback is used to find a matching existing superblock. * Whether or not the requested parameters in @fc are taken into account * is specific to the @test callback that is used. They may even be * completely ignored. * * If an extant superblock is matched, it will be returned unless: * * (1) the namespace the filesystem context @fc and the extant * superblock's namespace differ * * (2) the filesystem context @fc has requested that reusing an extant * superblock is not allowed * * In both cases EBUSY will be returned. * * If no match is made, a new superblock will be allocated and basic * initialisation will be performed (s_type, s_fs_info and s_id will be * set and the @set callback will be invoked), the superblock will be * published and it will be returned in a partially constructed state * with SB_BORN and SB_ACTIVE as yet unset. * * Return: On success, an extant or newly created superblock is * returned. On failure an error pointer is returned. */ struct super_block *sget_fc(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*set)(struct super_block *, struct fs_context *)) { struct super_block *s = NULL; struct super_block *old; struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns; int err; retry: spin_lock(&sb_lock); if (test) { hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) { if (test(old, fc)) goto share_extant_sb; } } if (!s) { spin_unlock(&sb_lock); s = alloc_super(fc->fs_type, fc->sb_flags, user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; } s->s_fs_info = fc->s_fs_info; err = set(s, fc); if (err) { s->s_fs_info = NULL; spin_unlock(&sb_lock); destroy_unused_super(s); return ERR_PTR(err); } fc->s_fs_info = NULL; s->s_type = fc->fs_type; s->s_iflags |= fc->s_iflags; strscpy(s->s_id, s->s_type->name, sizeof(s->s_id)); /* * Make the superblock visible on @super_blocks and @fs_supers. * It's in a nascent state and users should wait on SB_BORN or * SB_DYING to be set. */ list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &s->s_type->fs_supers); spin_unlock(&sb_lock); get_filesystem(s->s_type); shrinker_register(s->s_shrink); return s; share_extant_sb: if (user_ns != old->s_user_ns || fc->exclusive) { spin_unlock(&sb_lock); destroy_unused_super(s); if (fc->exclusive) warnfc(fc, "reusing existing filesystem not allowed"); else warnfc(fc, "reusing existing filesystem in another namespace not allowed"); return ERR_PTR(-EBUSY); } if (!grab_super(old)) goto retry; destroy_unused_super(s); return old; } EXPORT_SYMBOL(sget_fc); /** * sget - find or create a superblock * @type: filesystem type superblock should belong to * @test: comparison callback * @set: setup callback * @flags: mount flags * @data: argument to each of them */ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data) { struct user_namespace *user_ns = current_user_ns(); struct super_block *s = NULL; struct super_block *old; int err; /* We don't yet pass the user namespace of the parent * mount through to here so always use &init_user_ns * until that changes. */ if (flags & SB_SUBMOUNT) user_ns = &init_user_ns; retry: spin_lock(&sb_lock); if (test) { hlist_for_each_entry(old, &type->fs_supers, s_instances) { if (!test(old, data)) continue; if (user_ns != old->s_user_ns) { spin_unlock(&sb_lock); destroy_unused_super(s); return ERR_PTR(-EBUSY); } if (!grab_super(old)) goto retry; destroy_unused_super(s); return old; } } if (!s) { spin_unlock(&sb_lock); s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; } err = set(s, data); if (err) { spin_unlock(&sb_lock); destroy_unused_super(s); return ERR_PTR(err); } s->s_type = type; strscpy(s->s_id, type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); shrinker_register(s->s_shrink); return s; } EXPORT_SYMBOL(sget); void drop_super(struct super_block *sb) { super_unlock_shared(sb); put_super(sb); } EXPORT_SYMBOL(drop_super); void drop_super_exclusive(struct super_block *sb) { super_unlock_excl(sb); put_super(sb); } EXPORT_SYMBOL(drop_super_exclusive); static void __iterate_supers(void (*f)(struct super_block *)) { struct super_block *sb, *p = NULL; spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { if (super_flags(sb, SB_DYING)) continue; sb->s_count++; spin_unlock(&sb_lock); f(sb); spin_lock(&sb_lock); if (p) __put_super(p); p = sb; } if (p) __put_super(p); spin_unlock(&sb_lock); } /** * iterate_supers - call function for all active superblocks * @f: function to call * @arg: argument to pass to it * * Scans the superblock list and calls given function, passing it * locked superblock and given argument. */ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { bool locked; sb->s_count++; spin_unlock(&sb_lock); locked = super_lock_shared(sb); if (locked) { if (sb->s_root) f(sb, arg); super_unlock_shared(sb); } spin_lock(&sb_lock); if (p) __put_super(p); p = sb; } if (p) __put_super(p); spin_unlock(&sb_lock); } /** * iterate_supers_type - call function for superblocks of given type * @type: fs type * @f: function to call * @arg: argument to pass to it * * Scans the superblock list and calls given function, passing it * locked superblock and given argument. */ void iterate_supers_type(struct file_system_type *type, void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; spin_lock(&sb_lock); hlist_for_each_entry(sb, &type->fs_supers, s_instances) { bool locked; sb->s_count++; spin_unlock(&sb_lock); locked = super_lock_shared(sb); if (locked) { if (sb->s_root) f(sb, arg); super_unlock_shared(sb); } spin_lock(&sb_lock); if (p) __put_super(p); p = sb; } if (p) __put_super(p); spin_unlock(&sb_lock); } EXPORT_SYMBOL(iterate_supers_type); struct super_block *user_get_super(dev_t dev, bool excl) { struct super_block *sb; spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { if (sb->s_dev == dev) { bool locked; sb->s_count++; spin_unlock(&sb_lock); /* still alive? */ locked = super_lock(sb, excl); if (locked) { if (sb->s_root) return sb; super_unlock(sb, excl); } /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); break; } } spin_unlock(&sb_lock); return NULL; } /** * reconfigure_super - asks filesystem to change superblock parameters * @fc: The superblock and configuration * * Alters the configuration parameters of a live superblock. */ int reconfigure_super(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; int retval; bool remount_ro = false; bool remount_rw = false; bool force = fc->sb_flags & SB_FORCE; if (fc->sb_flags_mask & ~MS_RMT_MASK) return -EINVAL; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; retval = security_sb_remount(sb, fc->security); if (retval) return retval; if (fc->sb_flags_mask & SB_RDONLY) { #ifdef CONFIG_BLOCK if (!(fc->sb_flags & SB_RDONLY) && sb->s_bdev && bdev_read_only(sb->s_bdev)) return -EACCES; #endif remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb); remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); } if (remount_ro) { if (!hlist_empty(&sb->s_pins)) { super_unlock_excl(sb); group_pin_kill(&sb->s_pins); __super_lock_excl(sb); if (!sb->s_root) return 0; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; remount_ro = !sb_rdonly(sb); } } shrink_dcache_sb(sb); /* If we are reconfiguring to RDONLY and current sb is read/write, * make sure there are no files open for writing. */ if (remount_ro) { if (force) { sb_start_ro_state_change(sb); } else { retval = sb_prepare_remount_readonly(sb); if (retval) return retval; } } else if (remount_rw) { /* * Protect filesystem's reconfigure code from writes from * userspace until reconfigure finishes. */ sb_start_ro_state_change(sb); } if (fc->ops->reconfigure) { retval = fc->ops->reconfigure(fc); if (retval) { if (!force) goto cancel_readonly; /* If forced remount, go ahead despite any errors */ WARN(1, "forced remount of a %s fs returned %i\n", sb->s_type->name, retval); } } WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | (fc->sb_flags & fc->sb_flags_mask))); sb_end_ro_state_change(sb); /* * Some filesystems modify their metadata via some other path than the * bdev buffer cache (eg. use a private mapping, or directories in * pagecache, etc). Also file data modifications go via their own * mappings. So If we try to mount readonly then copy the filesystem * from bdev, we could get stale data, so invalidate it to give a best * effort at coherency. */ if (remount_ro && sb->s_bdev) invalidate_bdev(sb->s_bdev); return 0; cancel_readonly: sb_end_ro_state_change(sb); return retval; } static void do_emergency_remount_callback(struct super_block *sb) { bool locked = super_lock_excl(sb); if (locked && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) { struct fs_context *fc; fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY | SB_FORCE, SB_RDONLY); if (!IS_ERR(fc)) { if (parse_monolithic_mount_data(fc, NULL) == 0) (void)reconfigure_super(fc); put_fs_context(fc); } } if (locked) super_unlock_excl(sb); } static void do_emergency_remount(struct work_struct *work) { __iterate_supers(do_emergency_remount_callback); kfree(work); printk("Emergency Remount complete\n"); } void emergency_remount(void) { struct work_struct *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) { INIT_WORK(work, do_emergency_remount); schedule_work(work); } } static void do_thaw_all_callback(struct super_block *sb) { bool locked = super_lock_excl(sb); if (locked && sb->s_root) { if (IS_ENABLED(CONFIG_BLOCK)) while (sb->s_bdev && !bdev_thaw(sb->s_bdev)) pr_warn("Emergency Thaw on %pg\n", sb->s_bdev); thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE); return; } if (locked) super_unlock_excl(sb); } static void do_thaw_all(struct work_struct *work) { __iterate_supers(do_thaw_all_callback); kfree(work); printk(KERN_WARNING "Emergency Thaw complete\n"); } /** * emergency_thaw_all -- forcibly thaw every frozen filesystem * * Used for emergency unfreeze of all filesystems via SysRq */ void emergency_thaw_all(void) { struct work_struct *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) { INIT_WORK(work, do_thaw_all); schedule_work(work); } } static DEFINE_IDA(unnamed_dev_ida); /** * get_anon_bdev - Allocate a block device for filesystems which don't have one. * @p: Pointer to a dev_t. * * Filesystems which don't use real block devices can call this function * to allocate a virtual block device. * * Context: Any context. Frequently called while holding sb_lock. * Return: 0 on success, -EMFILE if there are no anonymous bdevs left * or -ENOMEM if memory allocation failed. */ int get_anon_bdev(dev_t *p) { int dev; /* * Many userspace utilities consider an FSID of 0 invalid. * Always return at least 1 from get_anon_bdev. */ dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1, GFP_ATOMIC); if (dev == -ENOSPC) dev = -EMFILE; if (dev < 0) return dev; *p = MKDEV(0, dev); return 0; } EXPORT_SYMBOL(get_anon_bdev); void free_anon_bdev(dev_t dev) { ida_free(&unnamed_dev_ida, MINOR(dev)); } EXPORT_SYMBOL(free_anon_bdev); int set_anon_super(struct super_block *s, void *data) { return get_anon_bdev(&s->s_dev); } EXPORT_SYMBOL(set_anon_super); void kill_anon_super(struct super_block *sb) { dev_t dev = sb->s_dev; generic_shutdown_super(sb); kill_super_notify(sb); free_anon_bdev(dev); } EXPORT_SYMBOL(kill_anon_super); void kill_litter_super(struct super_block *sb) { if (sb->s_root) d_genocide(sb->s_root); kill_anon_super(sb); } EXPORT_SYMBOL(kill_litter_super); int set_anon_super_fc(struct super_block *sb, struct fs_context *fc) { return set_anon_super(sb, NULL); } EXPORT_SYMBOL(set_anon_super_fc); static int test_keyed_super(struct super_block *sb, struct fs_context *fc) { return sb->s_fs_info == fc->s_fs_info; } static int test_single_super(struct super_block *s, struct fs_context *fc) { return 1; } static int vfs_get_super(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { struct super_block *sb; int err; sb = sget_fc(fc, test, set_anon_super_fc); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { err = fill_super(sb, fc); if (err) goto error; sb->s_flags |= SB_ACTIVE; } fc->root = dget(sb->s_root); return 0; error: deactivate_locked_super(sb); return err; } int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { return vfs_get_super(fc, NULL, fill_super); } EXPORT_SYMBOL(get_tree_nodev); int get_tree_single(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { return vfs_get_super(fc, test_single_super, fill_super); } EXPORT_SYMBOL(get_tree_single); int get_tree_keyed(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc), void *key) { fc->s_fs_info = key; return vfs_get_super(fc, test_keyed_super, fill_super); } EXPORT_SYMBOL(get_tree_keyed); static int set_bdev_super(struct super_block *s, void *data) { s->s_dev = *(dev_t *)data; return 0; } static int super_s_dev_set(struct super_block *s, struct fs_context *fc) { return set_bdev_super(s, fc->sget_key); } static int super_s_dev_test(struct super_block *s, struct fs_context *fc) { return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)fc->sget_key; } /** * sget_dev - Find or create a superblock by device number * @fc: Filesystem context. * @dev: device number * * Find or create a superblock using the provided device number that * will be stored in fc->sget_key. * * If an extant superblock is matched, then that will be returned with * an elevated reference count that the caller must transfer or discard. * * If no match is made, a new superblock will be allocated and basic * initialisation will be performed (s_type, s_fs_info, s_id, s_dev will * be set). The superblock will be published and it will be returned in * a partially constructed state with SB_BORN and SB_ACTIVE as yet * unset. * * Return: an existing or newly created superblock on success, an error * pointer on failure. */ struct super_block *sget_dev(struct fs_context *fc, dev_t dev) { fc->sget_key = &dev; return sget_fc(fc, super_s_dev_test, super_s_dev_set); } EXPORT_SYMBOL(sget_dev); #ifdef CONFIG_BLOCK /* * Lock the superblock that is holder of the bdev. Returns the superblock * pointer if we successfully locked the superblock and it is alive. Otherwise * we return NULL and just unlock bdev->bd_holder_lock. * * The function must be called with bdev->bd_holder_lock and releases it. */ static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl) __releases(&bdev->bd_holder_lock) { struct super_block *sb = bdev->bd_holder; bool locked; lockdep_assert_held(&bdev->bd_holder_lock); lockdep_assert_not_held(&sb->s_umount); lockdep_assert_not_held(&bdev->bd_disk->open_mutex); /* Make sure sb doesn't go away from under us */ spin_lock(&sb_lock); sb->s_count++; spin_unlock(&sb_lock); mutex_unlock(&bdev->bd_holder_lock); locked = super_lock(sb, excl); /* * If the superblock wasn't already SB_DYING then we hold * s_umount and can safely drop our temporary reference. */ put_super(sb); if (!locked) return NULL; if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) { super_unlock(sb, excl); return NULL; } return sb; } static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise) { struct super_block *sb; sb = bdev_super_lock(bdev, false); if (!sb) return; if (!surprise) sync_filesystem(sb); shrink_dcache_sb(sb); invalidate_inodes(sb); if (sb->s_op->shutdown) sb->s_op->shutdown(sb); super_unlock_shared(sb); } static void fs_bdev_sync(struct block_device *bdev) { struct super_block *sb; sb = bdev_super_lock(bdev, false); if (!sb) return; sync_filesystem(sb); super_unlock_shared(sb); } static struct super_block *get_bdev_super(struct block_device *bdev) { bool active = false; struct super_block *sb; sb = bdev_super_lock(bdev, true); if (sb) { active = atomic_inc_not_zero(&sb->s_active); super_unlock_excl(sb); } if (!active) return NULL; return sb; } /** * fs_bdev_freeze - freeze owning filesystem of block device * @bdev: block device * * Freeze the filesystem that owns this block device if it is still * active. * * A filesystem that owns multiple block devices may be frozen from each * block device and won't be unfrozen until all block devices are * unfrozen. Each block device can only freeze the filesystem once as we * nest freezes for block devices in the block layer. * * Return: If the freeze was successful zero is returned. If the freeze * failed a negative error code is returned. */ static int fs_bdev_freeze(struct block_device *bdev) { struct super_block *sb; int error = 0; lockdep_assert_held(&bdev->bd_fsfreeze_mutex); sb = get_bdev_super(bdev); if (!sb) return -EINVAL; if (sb->s_op->freeze_super) error = sb->s_op->freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); else error = freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); if (!error) error = sync_blockdev(bdev); deactivate_super(sb); return error; } /** * fs_bdev_thaw - thaw owning filesystem of block device * @bdev: block device * * Thaw the filesystem that owns this block device. * * A filesystem that owns multiple block devices may be frozen from each * block device and won't be unfrozen until all block devices are * unfrozen. Each block device can only freeze the filesystem once as we * nest freezes for block devices in the block layer. * * Return: If the thaw was successful zero is returned. If the thaw * failed a negative error code is returned. If this function * returns zero it doesn't mean that the filesystem is unfrozen * as it may have been frozen multiple times (kernel may hold a * freeze or might be frozen from other block devices). */ static int fs_bdev_thaw(struct block_device *bdev) { struct super_block *sb; int error; lockdep_assert_held(&bdev->bd_fsfreeze_mutex); sb = get_bdev_super(bdev); if (WARN_ON_ONCE(!sb)) return -EINVAL; if (sb->s_op->thaw_super) error = sb->s_op->thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); else error = thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); deactivate_super(sb); return error; } const struct blk_holder_ops fs_holder_ops = { .mark_dead = fs_bdev_mark_dead, .sync = fs_bdev_sync, .freeze = fs_bdev_freeze, .thaw = fs_bdev_thaw, }; EXPORT_SYMBOL_GPL(fs_holder_ops); int setup_bdev_super(struct super_block *sb, int sb_flags, struct fs_context *fc) { blk_mode_t mode = sb_open_mode(sb_flags); struct bdev_handle *bdev_handle; struct block_device *bdev; bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops); if (IS_ERR(bdev_handle)) { if (fc) errorf(fc, "%s: Can't open blockdev", fc->source); return PTR_ERR(bdev_handle); } bdev = bdev_handle->bdev; /* * This really should be in blkdev_get_by_dev, but right now can't due * to legacy issues that require us to allow opening a block device node * writable from userspace even for a read-only block device. */ if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { bdev_release(bdev_handle); return -EACCES; } /* * It is enough to check bdev was not frozen before we set * s_bdev as freezing will wait until SB_BORN is set. */ if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { if (fc) warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); bdev_release(bdev_handle); return -EBUSY; } spin_lock(&sb_lock); sb->s_bdev_handle = bdev_handle; sb->s_bdev = bdev; sb->s_bdi = bdi_get(bdev->bd_disk->bdi); if (bdev_stable_writes(bdev)) sb->s_iflags |= SB_I_STABLE_WRITES; spin_unlock(&sb_lock); snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-%s:%s", sb->s_type->name, sb->s_id); sb_set_blocksize(sb, block_size(bdev)); return 0; } EXPORT_SYMBOL_GPL(setup_bdev_super); /** * get_tree_bdev - Get a superblock based on a single block device * @fc: The filesystem context holding the parameters * @fill_super: Helper to initialise a new superblock */ int get_tree_bdev(struct fs_context *fc, int (*fill_super)(struct super_block *, struct fs_context *)) { struct super_block *s; int error = 0; dev_t dev; if (!fc->source) return invalf(fc, "No source specified"); error = lookup_bdev(fc->source, &dev); if (error) { errorf(fc, "%s: Can't lookup blockdev", fc->source); return error; } fc->sb_flags |= SB_NOSEC; s = sget_dev(fc, dev); if (IS_ERR(s)) return PTR_ERR(s); if (s->s_root) { /* Don't summarily change the RO/RW state. */ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev); deactivate_locked_super(s); return -EBUSY; } } else { error = setup_bdev_super(s, fc->sb_flags, fc); if (!error) error = fill_super(s, fc); if (error) { deactivate_locked_super(s); return error; } s->s_flags |= SB_ACTIVE; } BUG_ON(fc->root); fc->root = dget(s->s_root); return 0; } EXPORT_SYMBOL(get_tree_bdev); static int test_bdev_super(struct super_block *s, void *data) { return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data; } struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)) { struct super_block *s; int error; dev_t dev; error = lookup_bdev(dev_name, &dev); if (error) return ERR_PTR(error); flags |= SB_NOSEC; s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev); if (IS_ERR(s)) return ERR_CAST(s); if (s->s_root) { if ((flags ^ s->s_flags) & SB_RDONLY) { deactivate_locked_super(s); return ERR_PTR(-EBUSY); } } else { error = setup_bdev_super(s, flags, NULL); if (!error) error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); return ERR_PTR(error); } s->s_flags |= SB_ACTIVE; } return dget(s->s_root); } EXPORT_SYMBOL(mount_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; generic_shutdown_super(sb); if (bdev) { sync_blockdev(bdev); bdev_release(sb->s_bdev_handle); } } EXPORT_SYMBOL(kill_block_super); #endif struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)) { int error; struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); return ERR_PTR(error); } s->s_flags |= SB_ACTIVE; return dget(s->s_root); } EXPORT_SYMBOL(mount_nodev); int reconfigure_single(struct super_block *s, int flags, void *data) { struct fs_context *fc; int ret; /* The caller really need to be passing fc down into mount_single(), * then a chunk of this can be removed. [Bollocks -- AV] * Better yet, reconfiguration shouldn't happen, but rather the second * mount should be rejected if the parameters are not compatible. */ fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK); if (IS_ERR(fc)) return PTR_ERR(fc); ret = parse_monolithic_mount_data(fc, data); if (ret < 0) goto out; ret = reconfigure_super(fc); out: put_fs_context(fc); return ret; } static int compare_single(struct super_block *s, void *p) { return 1; } struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)) { struct super_block *s; int error; s = sget(fs_type, compare_single, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (!error) s->s_flags |= SB_ACTIVE; } else { error = reconfigure_single(s, flags, data); } if (unlikely(error)) { deactivate_locked_super(s); return ERR_PTR(error); } return dget(s->s_root); } EXPORT_SYMBOL(mount_single); /** * vfs_get_tree - Get the mountable root * @fc: The superblock configuration context. * * The filesystem is invoked to get or create a superblock which can then later * be used for mounting. The filesystem places a pointer to the root to be * used for mounting in @fc->root. */ int vfs_get_tree(struct fs_context *fc) { struct super_block *sb; int error; if (fc->root) return -EBUSY; /* Get the mountable root in fc->root, with a ref on the root and a ref * on the superblock. */ error = fc->ops->get_tree(fc); if (error < 0) return error; if (!fc->root) { pr_err("Filesystem %s get_tree() didn't set fc->root\n", fc->fs_type->name); /* We don't know what the locking state of the superblock is - * if there is a superblock. */ BUG(); } sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); /* * super_wake() contains a memory barrier which also care of * ordering for super_cache_count(). We place it before setting * SB_BORN as the data dependency between the two functions is * the superblock structure contents that we just set up, not * the SB_BORN flag. */ super_wake(sb, SB_BORN); error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL); if (unlikely(error)) { fc_drop_locked(fc); return error; } /* * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE * but s_maxbytes was an unsigned long long for many releases. Throw * this warning for a little while to try and catch filesystems that * violate this rule. */ WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes); return 0; } EXPORT_SYMBOL(vfs_get_tree); /* * Setup private BDI for given superblock. It gets automatically cleaned up * in generic_shutdown_super(). */ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...) { struct backing_dev_info *bdi; int err; va_list args; bdi = bdi_alloc(NUMA_NO_NODE); if (!bdi) return -ENOMEM; va_start(args, fmt); err = bdi_register_va(bdi, fmt, args); va_end(args); if (err) { bdi_put(bdi); return err; } WARN_ON(sb->s_bdi != &noop_backing_dev_info); sb->s_bdi = bdi; sb->s_iflags |= SB_I_PERSB_BDI; return 0; } EXPORT_SYMBOL(super_setup_bdi_name); /* * Setup private BDI for given superblock. I gets automatically cleaned up * in generic_shutdown_super(). */ int super_setup_bdi(struct super_block *sb) { static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name, atomic_long_inc_return(&bdi_seq)); } EXPORT_SYMBOL(super_setup_bdi); /** * sb_wait_write - wait until all writers to given file system finish * @sb: the super for which we wait * @level: type of writers we wait for (normal vs page fault) * * This function waits until there are no writers of given type to given file * system. */ static void sb_wait_write(struct super_block *sb, int level) { percpu_down_write(sb->s_writers.rw_sem + level-1); } /* * We are going to return to userspace and forget about these locks, the * ownership goes to the caller of thaw_super() which does unlock(). */ static void lockdep_sb_freeze_release(struct super_block *sb) { int level; for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_); } /* * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb). */ static void lockdep_sb_freeze_acquire(struct super_block *sb) { int level; for (level = 0; level < SB_FREEZE_LEVELS; ++level) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); } static void sb_freeze_unlock(struct super_block *sb, int level) { for (level--; level >= 0; level--) percpu_up_write(sb->s_writers.rw_sem + level); } static int wait_for_partially_frozen(struct super_block *sb) { int ret = 0; do { unsigned short old = sb->s_writers.frozen; up_write(&sb->s_umount); ret = wait_var_event_killable(&sb->s_writers.frozen, sb->s_writers.frozen != old); down_write(&sb->s_umount); } while (ret == 0 && sb->s_writers.frozen != SB_UNFROZEN && sb->s_writers.frozen != SB_FREEZE_COMPLETE); return ret; } #define FREEZE_HOLDERS (FREEZE_HOLDER_KERNEL | FREEZE_HOLDER_USERSPACE) #define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST) static inline int freeze_inc(struct super_block *sb, enum freeze_holder who) { WARN_ON_ONCE((who & ~FREEZE_FLAGS)); WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1); if (who & FREEZE_HOLDER_KERNEL) ++sb->s_writers.freeze_kcount; if (who & FREEZE_HOLDER_USERSPACE) ++sb->s_writers.freeze_ucount; return sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount; } static inline int freeze_dec(struct super_block *sb, enum freeze_holder who) { WARN_ON_ONCE((who & ~FREEZE_FLAGS)); WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1); if ((who & FREEZE_HOLDER_KERNEL) && sb->s_writers.freeze_kcount) --sb->s_writers.freeze_kcount; if ((who & FREEZE_HOLDER_USERSPACE) && sb->s_writers.freeze_ucount) --sb->s_writers.freeze_ucount; return sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount; } static inline bool may_freeze(struct super_block *sb, enum freeze_holder who) { WARN_ON_ONCE((who & ~FREEZE_FLAGS)); WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1); if (who & FREEZE_HOLDER_KERNEL) return (who & FREEZE_MAY_NEST) || sb->s_writers.freeze_kcount == 0; if (who & FREEZE_HOLDER_USERSPACE) return (who & FREEZE_MAY_NEST) || sb->s_writers.freeze_ucount == 0; return false; } /** * freeze_super - lock the filesystem and force it into a consistent state * @sb: the super to lock * @who: context that wants to freeze * * Syncs the super to make sure the filesystem is consistent and calls the fs's * freeze_fs. Subsequent calls to this without first thawing the fs may return * -EBUSY. * * @who should be: * * %FREEZE_HOLDER_USERSPACE if userspace wants to freeze the fs; * * %FREEZE_HOLDER_KERNEL if the kernel wants to freeze the fs. * * %FREEZE_MAY_NEST whether nesting freeze and thaw requests is allowed. * * The @who argument distinguishes between the kernel and userspace trying to * freeze the filesystem. Although there cannot be multiple kernel freezes or * multiple userspace freezes in effect at any given time, the kernel and * userspace can both hold a filesystem frozen. The filesystem remains frozen * until there are no kernel or userspace freezes in effect. * * A filesystem may hold multiple devices and thus a filesystems may be * frozen through the block layer via multiple block devices. In this * case the request is marked as being allowed to nest by passing * FREEZE_MAY_NEST. The filesystem remains frozen until all block * devices are unfrozen. If multiple freezes are attempted without * FREEZE_MAY_NEST -EBUSY will be returned. * * During this function, sb->s_writers.frozen goes through these values: * * SB_UNFROZEN: File system is normal, all writes progress as usual. * * SB_FREEZE_WRITE: The file system is in the process of being frozen. New * writes should be blocked, though page faults are still allowed. We wait for * all writes to complete and then proceed to the next stage. * * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked * but internal fs threads can still modify the filesystem (although they * should not dirty new pages or inodes), writeback can run etc. After waiting * for all running page faults we sync the filesystem which will clean all * dirty pages and inodes (no new dirty pages or inodes can be created when * sync is running). * * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs * modification are blocked (e.g. XFS preallocation truncation on inode * reclaim). This is usually implemented by blocking new transactions for * filesystems that have them and need this additional guard. After all * internal writers are finished we call ->freeze_fs() to finish filesystem * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is * mostly auxiliary for filesystems to verify they do not modify frozen fs. * * sb->s_writers.frozen is protected by sb->s_umount. * * Return: If the freeze was successful zero is returned. If the freeze * failed a negative error code is returned. */ int freeze_super(struct super_block *sb, enum freeze_holder who) { int ret; if (!super_lock_excl(sb)) { WARN_ON_ONCE("Dying superblock while freezing!"); return -EINVAL; } atomic_inc(&sb->s_active); retry: if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) { if (may_freeze(sb, who)) ret = !!WARN_ON_ONCE(freeze_inc(sb, who) == 1); else ret = -EBUSY; /* All freezers share a single active reference. */ deactivate_locked_super(sb); return ret; } if (sb->s_writers.frozen != SB_UNFROZEN) { ret = wait_for_partially_frozen(sb); if (ret) { deactivate_locked_super(sb); return ret; } goto retry; } if (sb_rdonly(sb)) { /* Nothing to do really... */ WARN_ON_ONCE(freeze_inc(sb, who) > 1); sb->s_writers.frozen = SB_FREEZE_COMPLETE; wake_up_var(&sb->s_writers.frozen); super_unlock_excl(sb); return 0; } sb->s_writers.frozen = SB_FREEZE_WRITE; /* Release s_umount to preserve sb_start_write -> s_umount ordering */ super_unlock_excl(sb); sb_wait_write(sb, SB_FREEZE_WRITE); __super_lock_excl(sb); /* Now we go and block page faults... */ sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; sb_wait_write(sb, SB_FREEZE_PAGEFAULT); /* All writers are done so after syncing there won't be dirty data */ ret = sync_filesystem(sb); if (ret) { sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); wake_up_var(&sb->s_writers.frozen); deactivate_locked_super(sb); return ret; } /* Now wait for internal filesystem counter */ sb->s_writers.frozen = SB_FREEZE_FS; sb_wait_write(sb, SB_FREEZE_FS); if (sb->s_op->freeze_fs) { ret = sb->s_op->freeze_fs(sb); if (ret) { printk(KERN_ERR "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_FS); wake_up_var(&sb->s_writers.frozen); deactivate_locked_super(sb); return ret; } } /* * For debugging purposes so that fs can warn if it sees write activity * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super(). */ WARN_ON_ONCE(freeze_inc(sb, who) > 1); sb->s_writers.frozen = SB_FREEZE_COMPLETE; wake_up_var(&sb->s_writers.frozen); lockdep_sb_freeze_release(sb); super_unlock_excl(sb); return 0; } EXPORT_SYMBOL(freeze_super); /* * Undoes the effect of a freeze_super_locked call. If the filesystem is * frozen both by userspace and the kernel, a thaw call from either source * removes that state without releasing the other state or unlocking the * filesystem. */ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who) { int error = -EINVAL; if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) goto out_unlock; /* * All freezers share a single active reference. * So just unlock in case there are any left. */ if (freeze_dec(sb, who)) goto out_unlock; if (sb_rdonly(sb)) { sb->s_writers.frozen = SB_UNFROZEN; wake_up_var(&sb->s_writers.frozen); goto out_deactivate; } lockdep_sb_freeze_acquire(sb); if (sb->s_op->unfreeze_fs) { error = sb->s_op->unfreeze_fs(sb); if (error) { pr_err("VFS: Filesystem thaw failed\n"); freeze_inc(sb, who); lockdep_sb_freeze_release(sb); goto out_unlock; } } sb->s_writers.frozen = SB_UNFROZEN; wake_up_var(&sb->s_writers.frozen); sb_freeze_unlock(sb, SB_FREEZE_FS); out_deactivate: deactivate_locked_super(sb); return 0; out_unlock: super_unlock_excl(sb); return error; } /** * thaw_super -- unlock filesystem * @sb: the super to thaw * @who: context that wants to freeze * * Unlocks the filesystem and marks it writeable again after freeze_super() * if there are no remaining freezes on the filesystem. * * @who should be: * * %FREEZE_HOLDER_USERSPACE if userspace wants to thaw the fs; * * %FREEZE_HOLDER_KERNEL if the kernel wants to thaw the fs. * * %FREEZE_MAY_NEST whether nesting freeze and thaw requests is allowed * * A filesystem may hold multiple devices and thus a filesystems may * have been frozen through the block layer via multiple block devices. * The filesystem remains frozen until all block devices are unfrozen. */ int thaw_super(struct super_block *sb, enum freeze_holder who) { if (!super_lock_excl(sb)) { WARN_ON_ONCE("Dying superblock while thawing!"); return -EINVAL; } return thaw_super_locked(sb, who); } EXPORT_SYMBOL(thaw_super); /* * Create workqueue for deferred direct IO completions. We allocate the * workqueue when it's first needed. This avoids creating workqueue for * filesystems that don't need it and also allows us to create the workqueue * late enough so the we can include s_id in the name of the workqueue. */ int sb_init_dio_done_wq(struct super_block *sb) { struct workqueue_struct *old; struct workqueue_struct *wq = alloc_workqueue("dio/%s", WQ_MEM_RECLAIM, 0, sb->s_id); if (!wq) return -ENOMEM; /* * This has to be atomic as more DIOs can race to create the workqueue */ old = cmpxchg(&sb->s_dio_done_wq, NULL, wq); /* Someone created workqueue before us? Free ours... */ if (old) destroy_workqueue(wq); return 0; } EXPORT_SYMBOL_GPL(sb_init_dio_done_wq);
8 8 7 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 // SPDX-License-Identifier: GPL-2.0 /* * thermal.c - Generic Thermal Management Sysfs support. * * Copyright (C) 2008 Intel Corp * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/device.h> #include <linux/err.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/kdev_t.h> #include <linux/idr.h> #include <linux/thermal.h> #include <linux/reboot.h> #include <linux/string.h> #include <linux/of.h> #include <linux/suspend.h> #define CREATE_TRACE_POINTS #include "thermal_trace.h" #include "thermal_core.h" #include "thermal_hwmon.h" static DEFINE_IDA(thermal_tz_ida); static DEFINE_IDA(thermal_cdev_ida); static LIST_HEAD(thermal_tz_list); static LIST_HEAD(thermal_cdev_list); static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); static struct thermal_governor *def_governor; /* * Governor section: set of functions to handle thermal governors * * Functions to help in the life cycle of thermal governors within * the thermal core and by the thermal governor code. */ static struct thermal_governor *__find_governor(const char *name) { struct thermal_governor *pos; if (!name || !name[0]) return def_governor; list_for_each_entry(pos, &thermal_governor_list, governor_list) if (!strncasecmp(name, pos->name, THERMAL_NAME_LENGTH)) return pos; return NULL; } /** * bind_previous_governor() - bind the previous governor of the thermal zone * @tz: a valid pointer to a struct thermal_zone_device * @failed_gov_name: the name of the governor that failed to register * * Register the previous governor of the thermal zone after a new * governor has failed to be bound. */ static void bind_previous_governor(struct thermal_zone_device *tz, const char *failed_gov_name) { if (tz->governor && tz->governor->bind_to_tz) { if (tz->governor->bind_to_tz(tz)) { dev_err(&tz->device, "governor %s failed to bind and the previous one (%s) failed to bind again, thermal zone %s has no governor\n", failed_gov_name, tz->governor->name, tz->type); tz->governor = NULL; } } } /** * thermal_set_governor() - Switch to another governor * @tz: a valid pointer to a struct thermal_zone_device * @new_gov: pointer to the new governor * * Change the governor of thermal zone @tz. * * Return: 0 on success, an error if the new governor's bind_to_tz() failed. */ static int thermal_set_governor(struct thermal_zone_device *tz, struct thermal_governor *new_gov) { int ret = 0; if (tz->governor && tz->governor->unbind_from_tz) tz->governor->unbind_from_tz(tz); if (new_gov && new_gov->bind_to_tz) { ret = new_gov->bind_to_tz(tz); if (ret) { bind_previous_governor(tz, new_gov->name); return ret; } } tz->governor = new_gov; return ret; } int thermal_register_governor(struct thermal_governor *governor) { int err; const char *name; struct thermal_zone_device *pos; if (!governor) return -EINVAL; mutex_lock(&thermal_governor_lock); err = -EBUSY; if (!__find_governor(governor->name)) { bool match_default; err = 0; list_add(&governor->governor_list, &thermal_governor_list); match_default = !strncmp(governor->name, DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH); if (!def_governor && match_default) def_governor = governor; } mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) { /* * only thermal zones with specified tz->tzp->governor_name * may run with tz->govenor unset */ if (pos->governor) continue; name = pos->tzp->governor_name; if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) { int ret; ret = thermal_set_governor(pos, governor); if (ret) dev_err(&pos->device, "Failed to set governor %s for thermal zone %s: %d\n", governor->name, pos->type, ret); } } mutex_unlock(&thermal_list_lock); mutex_unlock(&thermal_governor_lock); return err; } void thermal_unregister_governor(struct thermal_governor *governor) { struct thermal_zone_device *pos; if (!governor) return; mutex_lock(&thermal_governor_lock); if (!__find_governor(governor->name)) goto exit; mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) { if (!strncasecmp(pos->governor->name, governor->name, THERMAL_NAME_LENGTH)) thermal_set_governor(pos, NULL); } mutex_unlock(&thermal_list_lock); list_del(&governor->governor_list); exit: mutex_unlock(&thermal_governor_lock); } int thermal_zone_device_set_policy(struct thermal_zone_device *tz, char *policy) { struct thermal_governor *gov; int ret = -EINVAL; mutex_lock(&thermal_governor_lock); mutex_lock(&tz->lock); gov = __find_governor(strim(policy)); if (!gov) goto exit; ret = thermal_set_governor(tz, gov); exit: mutex_unlock(&tz->lock); mutex_unlock(&thermal_governor_lock); thermal_notify_tz_gov_change(tz, policy); return ret; } int thermal_build_list_of_policies(char *buf) { struct thermal_governor *pos; ssize_t count = 0; mutex_lock(&thermal_governor_lock); list_for_each_entry(pos, &thermal_governor_list, governor_list) { count += sysfs_emit_at(buf, count, "%s ", pos->name); } count += sysfs_emit_at(buf, count, "\n"); mutex_unlock(&thermal_governor_lock); return count; } static void __init thermal_unregister_governors(void) { struct thermal_governor **governor; for_each_governor_table(governor) thermal_unregister_governor(*governor); } static int __init thermal_register_governors(void) { int ret = 0; struct thermal_governor **governor; for_each_governor_table(governor) { ret = thermal_register_governor(*governor); if (ret) { pr_err("Failed to register governor: '%s'", (*governor)->name); break; } pr_info("Registered thermal governor '%s'", (*governor)->name); } if (ret) { struct thermal_governor **gov; for_each_governor_table(gov) { if (gov == governor) break; thermal_unregister_governor(*gov); } } return ret; } /* * Zone update section: main control loop applied to each zone while monitoring * * in polling mode. The monitoring is done using a workqueue. * Same update may be done on a zone by calling thermal_zone_device_update(). * * An update means: * - Non-critical trips will invoke the governor responsible for that zone; * - Hot trips will produce a notification to userspace; * - Critical trip point will cause a system shutdown. */ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, unsigned long delay) { if (delay) mod_delayed_work(system_freezable_power_efficient_wq, &tz->poll_queue, delay); else cancel_delayed_work(&tz->poll_queue); } static void monitor_thermal_zone(struct thermal_zone_device *tz) { if (tz->mode != THERMAL_DEVICE_ENABLED) thermal_zone_device_set_polling(tz, 0); else if (tz->passive) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); } static void handle_non_critical_trips(struct thermal_zone_device *tz, const struct thermal_trip *trip) { tz->governor ? tz->governor->throttle(tz, trip) : def_governor->throttle(tz, trip); } void thermal_governor_update_tz(struct thermal_zone_device *tz, enum thermal_notify_event reason) { if (!tz->governor || !tz->governor->update_tz) return; tz->governor->update_tz(tz, reason); } static void thermal_zone_device_halt(struct thermal_zone_device *tz, bool shutdown) { /* * poweroff_delay_ms must be a carefully profiled positive value. * Its a must for forced_emergency_poweroff_work to be scheduled. */ int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS; const char *msg = "Temperature too high"; dev_emerg(&tz->device, "%s: critical temperature reached\n", tz->type); if (shutdown) hw_protection_shutdown(msg, poweroff_delay_ms); else hw_protection_reboot(msg, poweroff_delay_ms); } void thermal_zone_device_critical(struct thermal_zone_device *tz) { thermal_zone_device_halt(tz, true); } EXPORT_SYMBOL(thermal_zone_device_critical); void thermal_zone_device_critical_reboot(struct thermal_zone_device *tz) { thermal_zone_device_halt(tz, false); } static void handle_critical_trips(struct thermal_zone_device *tz, const struct thermal_trip *trip) { /* If we have not crossed the trip_temp, we do not care. */ if (trip->temperature <= 0 || tz->temperature < trip->temperature) return; trace_thermal_zone_trip(tz, thermal_zone_trip_id(tz, trip), trip->type); if (trip->type == THERMAL_TRIP_CRITICAL) tz->ops->critical(tz); else if (tz->ops->hot) tz->ops->hot(tz); } static void handle_thermal_trip(struct thermal_zone_device *tz, struct thermal_trip *trip) { if (trip->temperature == THERMAL_TEMP_INVALID) return; if (tz->last_temperature == THERMAL_TEMP_INVALID) { /* Initialization. */ trip->threshold = trip->temperature; if (tz->temperature >= trip->threshold) trip->threshold -= trip->hysteresis; } else if (tz->last_temperature < trip->threshold) { /* * The trip threshold is equal to the trip temperature, unless * the latter has changed in the meantime. In either case, * the trip is crossed if the current zone temperature is at * least equal to its temperature, but otherwise ensure that * the threshold and the trip temperature will be equal. */ if (tz->temperature >= trip->temperature) { thermal_notify_tz_trip_up(tz, trip); thermal_debug_tz_trip_up(tz, trip); trip->threshold = trip->temperature - trip->hysteresis; } else { trip->threshold = trip->temperature; } } else { /* * The previous zone temperature was above or equal to the trip * threshold, which would be equal to the "low temperature" of * the trip (its temperature minus its hysteresis), unless the * trip temperature or hysteresis had changed. In either case, * the trip is crossed if the current zone temperature is below * the low temperature of the trip, but otherwise ensure that * the trip threshold will be equal to the low temperature of * the trip. */ if (tz->temperature < trip->temperature - trip->hysteresis) { thermal_notify_tz_trip_down(tz, trip); thermal_debug_tz_trip_down(tz, trip); trip->threshold = trip->temperature; } else { trip->threshold = trip->temperature - trip->hysteresis; } } if (trip->type == THERMAL_TRIP_CRITICAL || trip->type == THERMAL_TRIP_HOT) handle_critical_trips(tz, trip); else handle_non_critical_trips(tz, trip); } static void update_temperature(struct thermal_zone_device *tz) { int temp, ret; ret = __thermal_zone_get_temp(tz, &temp); if (ret) { if (ret != -EAGAIN) dev_warn(&tz->device, "failed to read out thermal zone (%d)\n", ret); return; } tz->last_temperature = tz->temperature; tz->temperature = temp; trace_thermal_temperature(tz); thermal_genl_sampling_temp(tz->id, temp); thermal_debug_update_temp(tz); } static void thermal_zone_device_check(struct work_struct *work) { struct thermal_zone_device *tz = container_of(work, struct thermal_zone_device, poll_queue.work); thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); } static void thermal_zone_device_init(struct thermal_zone_device *tz) { struct thermal_instance *pos; INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_check); tz->temperature = THERMAL_TEMP_INVALID; tz->prev_low_trip = -INT_MAX; tz->prev_high_trip = INT_MAX; list_for_each_entry(pos, &tz->thermal_instances, tz_node) pos->initialized = false; } void __thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event) { struct thermal_trip *trip; if (tz->suspended) return; if (!thermal_zone_device_is_enabled(tz)) return; update_temperature(tz); __thermal_zone_set_trips(tz); tz->notify_event = event; for_each_trip(tz, trip) handle_thermal_trip(tz, trip); monitor_thermal_zone(tz); } static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, enum thermal_device_mode mode) { int ret = 0; mutex_lock(&tz->lock); /* do nothing if mode isn't changing */ if (mode == tz->mode) { mutex_unlock(&tz->lock); return ret; } if (tz->ops->change_mode) ret = tz->ops->change_mode(tz, mode); if (!ret) tz->mode = mode; __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); mutex_unlock(&tz->lock); if (mode == THERMAL_DEVICE_ENABLED) thermal_notify_tz_enable(tz); else thermal_notify_tz_disable(tz); return ret; } int thermal_zone_device_enable(struct thermal_zone_device *tz) { return thermal_zone_device_set_mode(tz, THERMAL_DEVICE_ENABLED); } EXPORT_SYMBOL_GPL(thermal_zone_device_enable); int thermal_zone_device_disable(struct thermal_zone_device *tz) { return thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED); } EXPORT_SYMBOL_GPL(thermal_zone_device_disable); int thermal_zone_device_is_enabled(struct thermal_zone_device *tz) { lockdep_assert_held(&tz->lock); return tz->mode == THERMAL_DEVICE_ENABLED; } static bool thermal_zone_is_present(struct thermal_zone_device *tz) { return !list_empty(&tz->node); } void thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event) { mutex_lock(&tz->lock); if (thermal_zone_is_present(tz)) __thermal_zone_device_update(tz, event); mutex_unlock(&tz->lock); } EXPORT_SYMBOL_GPL(thermal_zone_device_update); int for_each_thermal_governor(int (*cb)(struct thermal_governor *, void *), void *data) { struct thermal_governor *gov; int ret = 0; mutex_lock(&thermal_governor_lock); list_for_each_entry(gov, &thermal_governor_list, governor_list) { ret = cb(gov, data); if (ret) break; } mutex_unlock(&thermal_governor_lock); return ret; } int for_each_thermal_cooling_device(int (*cb)(struct thermal_cooling_device *, void *), void *data) { struct thermal_cooling_device *cdev; int ret = 0; mutex_lock(&thermal_list_lock); list_for_each_entry(cdev, &thermal_cdev_list, node) { ret = cb(cdev, data); if (ret) break; } mutex_unlock(&thermal_list_lock); return ret; } int for_each_thermal_zone(int (*cb)(struct thermal_zone_device *, void *), void *data) { struct thermal_zone_device *tz; int ret = 0; mutex_lock(&thermal_list_lock); list_for_each_entry(tz, &thermal_tz_list, node) { ret = cb(tz, data); if (ret) break; } mutex_unlock(&thermal_list_lock); return ret; } struct thermal_zone_device *thermal_zone_get_by_id(int id) { struct thermal_zone_device *tz, *match = NULL; mutex_lock(&thermal_list_lock); list_for_each_entry(tz, &thermal_tz_list, node) { if (tz->id == id) { match = tz; break; } } mutex_unlock(&thermal_list_lock); return match; } /* * Device management section: cooling devices, zones devices, and binding * * Set of functions provided by the thermal core for: * - cooling devices lifecycle: registration, unregistration, * binding, and unbinding. * - thermal zone devices lifecycle: registration, unregistration, * binding, and unbinding. */ /** * thermal_bind_cdev_to_trip - bind a cooling device to a thermal zone * @tz: pointer to struct thermal_zone_device * @trip: trip point the cooling devices is associated with in this zone. * @cdev: pointer to struct thermal_cooling_device * @upper: the Maximum cooling state for this trip point. * THERMAL_NO_LIMIT means no upper limit, * and the cooling device can be in max_state. * @lower: the Minimum cooling state can be used for this trip point. * THERMAL_NO_LIMIT means no lower limit, * and the cooling device can be in cooling state 0. * @weight: The weight of the cooling device to be bound to the * thermal zone. Use THERMAL_WEIGHT_DEFAULT for the * default value * * This interface function bind a thermal cooling device to the certain trip * point of a thermal zone device. * This function is usually called in the thermal zone device .bind callback. * * Return: 0 on success, the proper error value otherwise. */ int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, const struct thermal_trip *trip, struct thermal_cooling_device *cdev, unsigned long upper, unsigned long lower, unsigned int weight) { struct thermal_instance *dev; struct thermal_instance *pos; struct thermal_zone_device *pos1; struct thermal_cooling_device *pos2; bool upper_no_limit; int result; list_for_each_entry(pos1, &thermal_tz_list, node) { if (pos1 == tz) break; } list_for_each_entry(pos2, &thermal_cdev_list, node) { if (pos2 == cdev) break; } if (tz != pos1 || cdev != pos2) return -EINVAL; /* lower default 0, upper default max_state */ lower = lower == THERMAL_NO_LIMIT ? 0 : lower; if (upper == THERMAL_NO_LIMIT) { upper = cdev->max_state; upper_no_limit = true; } else { upper_no_limit = false; } if (lower > upper || upper > cdev->max_state) return -EINVAL; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; dev->tz = tz; dev->cdev = cdev; dev->trip = trip; dev->upper = upper; dev->upper_no_limit = upper_no_limit; dev->lower = lower; dev->target = THERMAL_NO_TARGET; dev->weight = weight; result = ida_alloc(&tz->ida, GFP_KERNEL); if (result < 0) goto free_mem; dev->id = result; sprintf(dev->name, "cdev%d", dev->id); result = sysfs_create_link(&tz->device.kobj, &cdev->device.kobj, dev->name); if (result) goto release_ida; snprintf(dev->attr_name, sizeof(dev->attr_name), "cdev%d_trip_point", dev->id); sysfs_attr_init(&dev->attr.attr); dev->attr.attr.name = dev->attr_name; dev->attr.attr.mode = 0444; dev->attr.show = trip_point_show; result = device_create_file(&tz->device, &dev->attr); if (result) goto remove_symbol_link; snprintf(dev->weight_attr_name, sizeof(dev->weight_attr_name), "cdev%d_weight", dev->id); sysfs_attr_init(&dev->weight_attr.attr); dev->weight_attr.attr.name = dev->weight_attr_name; dev->weight_attr.attr.mode = S_IWUSR | S_IRUGO; dev->weight_attr.show = weight_show; dev->weight_attr.store = weight_store; result = device_create_file(&tz->device, &dev->weight_attr); if (result) goto remove_trip_file; mutex_lock(&tz->lock); mutex_lock(&cdev->lock); list_for_each_entry(pos, &tz->thermal_instances, tz_node) if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) { result = -EEXIST; break; } if (!result) { list_add_tail(&dev->tz_node, &tz->thermal_instances); list_add_tail(&dev->cdev_node, &cdev->thermal_instances); atomic_set(&tz->need_update, 1); thermal_governor_update_tz(tz, THERMAL_TZ_BIND_CDEV); } mutex_unlock(&cdev->lock); mutex_unlock(&tz->lock); if (!result) return 0; device_remove_file(&tz->device, &dev->weight_attr); remove_trip_file: device_remove_file(&tz->device, &dev->attr); remove_symbol_link: sysfs_remove_link(&tz->device.kobj, dev->name); release_ida: ida_free(&tz->ida, dev->id); free_mem: kfree(dev); return result; } EXPORT_SYMBOL_GPL(thermal_bind_cdev_to_trip); int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, int trip_index, struct thermal_cooling_device *cdev, unsigned long upper, unsigned long lower, unsigned int weight) { if (trip_index < 0 || trip_index >= tz->num_trips) return -EINVAL; return thermal_bind_cdev_to_trip(tz, &tz->trips[trip_index], cdev, upper, lower, weight); } EXPORT_SYMBOL_GPL(thermal_zone_bind_cooling_device); /** * thermal_unbind_cdev_from_trip - unbind a cooling device from a thermal zone. * @tz: pointer to a struct thermal_zone_device. * @trip: trip point the cooling devices is associated with in this zone. * @cdev: pointer to a struct thermal_cooling_device. * * This interface function unbind a thermal cooling device from the certain * trip point of a thermal zone device. * This function is usually called in the thermal zone device .unbind callback. * * Return: 0 on success, the proper error value otherwise. */ int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, const struct thermal_trip *trip, struct thermal_cooling_device *cdev) { struct thermal_instance *pos, *next; mutex_lock(&tz->lock); mutex_lock(&cdev->lock); list_for_each_entry_safe(pos, next, &tz->thermal_instances, tz_node) { if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) { list_del(&pos->tz_node); list_del(&pos->cdev_node); thermal_governor_update_tz(tz, THERMAL_TZ_UNBIND_CDEV); mutex_unlock(&cdev->lock); mutex_unlock(&tz->lock); goto unbind; } } mutex_unlock(&cdev->lock); mutex_unlock(&tz->lock); return -ENODEV; unbind: device_remove_file(&tz->device, &pos->weight_attr); device_remove_file(&tz->device, &pos->attr); sysfs_remove_link(&tz->device.kobj, pos->name); ida_free(&tz->ida, pos->id); kfree(pos); return 0; } EXPORT_SYMBOL_GPL(thermal_unbind_cdev_from_trip); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, int trip_index, struct thermal_cooling_device *cdev) { if (trip_index < 0 || trip_index >= tz->num_trips) return -EINVAL; return thermal_unbind_cdev_from_trip(tz, &tz->trips[trip_index], cdev); } EXPORT_SYMBOL_GPL(thermal_zone_unbind_cooling_device); static void thermal_release(struct device *dev) { struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; if (!strncmp(dev_name(dev), "thermal_zone", sizeof("thermal_zone") - 1)) { tz = to_thermal_zone(dev); thermal_zone_destroy_device_groups(tz); mutex_destroy(&tz->lock); complete(&tz->removal); } else if (!strncmp(dev_name(dev), "cooling_device", sizeof("cooling_device") - 1)) { cdev = to_cooling_device(dev); thermal_cooling_device_destroy_sysfs(cdev); kfree_const(cdev->type); ida_free(&thermal_cdev_ida, cdev->id); kfree(cdev); } } static struct class *thermal_class; static inline void print_bind_err_msg(struct thermal_zone_device *tz, struct thermal_cooling_device *cdev, int ret) { dev_err(&tz->device, "binding zone %s with cdev %s failed:%d\n", tz->type, cdev->type, ret); } static void bind_cdev(struct thermal_cooling_device *cdev) { int ret; struct thermal_zone_device *pos = NULL; list_for_each_entry(pos, &thermal_tz_list, node) { if (pos->ops->bind) { ret = pos->ops->bind(pos, cdev); if (ret) print_bind_err_msg(pos, cdev, ret); } } } /** * __thermal_cooling_device_register() - register a new thermal cooling device * @np: a pointer to a device tree node. * @type: the thermal cooling device type. * @devdata: device private data. * @ops: standard thermal cooling devices callbacks. * * This interface function adds a new thermal cooling device (fan/processor/...) * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself * to all the thermal zone devices registered at the same time. * It also gives the opportunity to link the cooling device to a device tree * node, so that it can be bound to a thermal zone created out of device tree. * * Return: a pointer to the created struct thermal_cooling_device or an * ERR_PTR. Caller must check return value with IS_ERR*() helpers. */ static struct thermal_cooling_device * __thermal_cooling_device_register(struct device_node *np, const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { struct thermal_cooling_device *cdev; struct thermal_zone_device *pos = NULL; int id, ret; if (!ops || !ops->get_max_state || !ops->get_cur_state || !ops->set_cur_state) return ERR_PTR(-EINVAL); if (!thermal_class) return ERR_PTR(-ENODEV); cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) return ERR_PTR(-ENOMEM); ret = ida_alloc(&thermal_cdev_ida, GFP_KERNEL); if (ret < 0) goto out_kfree_cdev; cdev->id = ret; id = ret; cdev->type = kstrdup_const(type ? type : "", GFP_KERNEL); if (!cdev->type) { ret = -ENOMEM; goto out_ida_remove; } mutex_init(&cdev->lock); INIT_LIST_HEAD(&cdev->thermal_instances); cdev->np = np; cdev->ops = ops; cdev->updated = false; cdev->device.class = thermal_class; cdev->devdata = devdata; ret = cdev->ops->get_max_state(cdev, &cdev->max_state); if (ret) goto out_cdev_type; thermal_cooling_device_setup_sysfs(cdev); ret = dev_set_name(&cdev->device, "cooling_device%d", cdev->id); if (ret) goto out_cooling_dev; ret = device_register(&cdev->device); if (ret) { /* thermal_release() handles rest of the cleanup */ put_device(&cdev->device); return ERR_PTR(ret); } /* Add 'this' new cdev to the global cdev list */ mutex_lock(&thermal_list_lock); list_add(&cdev->node, &thermal_cdev_list); /* Update binding information for 'this' new cdev */ bind_cdev(cdev); list_for_each_entry(pos, &thermal_tz_list, node) if (atomic_cmpxchg(&pos->need_update, 1, 0)) thermal_zone_device_update(pos, THERMAL_EVENT_UNSPECIFIED); mutex_unlock(&thermal_list_lock); thermal_debug_cdev_add(cdev); return cdev; out_cooling_dev: thermal_cooling_device_destroy_sysfs(cdev); out_cdev_type: kfree_const(cdev->type); out_ida_remove: ida_free(&thermal_cdev_ida, id); out_kfree_cdev: kfree(cdev); return ERR_PTR(ret); } /** * thermal_cooling_device_register() - register a new thermal cooling device * @type: the thermal cooling device type. * @devdata: device private data. * @ops: standard thermal cooling devices callbacks. * * This interface function adds a new thermal cooling device (fan/processor/...) * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself * to all the thermal zone devices registered at the same time. * * Return: a pointer to the created struct thermal_cooling_device or an * ERR_PTR. Caller must check return value with IS_ERR*() helpers. */ struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { return __thermal_cooling_device_register(NULL, type, devdata, ops); } EXPORT_SYMBOL_GPL(thermal_cooling_device_register); /** * thermal_of_cooling_device_register() - register an OF thermal cooling device * @np: a pointer to a device tree node. * @type: the thermal cooling device type. * @devdata: device private data. * @ops: standard thermal cooling devices callbacks. * * This function will register a cooling device with device tree node reference. * This interface function adds a new thermal cooling device (fan/processor/...) * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself * to all the thermal zone devices registered at the same time. * * Return: a pointer to the created struct thermal_cooling_device or an * ERR_PTR. Caller must check return value with IS_ERR*() helpers. */ struct thermal_cooling_device * thermal_of_cooling_device_register(struct device_node *np, const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { return __thermal_cooling_device_register(np, type, devdata, ops); } EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register); static void thermal_cooling_device_release(struct device *dev, void *res) { thermal_cooling_device_unregister( *(struct thermal_cooling_device **)res); } /** * devm_thermal_of_cooling_device_register() - register an OF thermal cooling * device * @dev: a valid struct device pointer of a sensor device. * @np: a pointer to a device tree node. * @type: the thermal cooling device type. * @devdata: device private data. * @ops: standard thermal cooling devices callbacks. * * This function will register a cooling device with device tree node reference. * This interface function adds a new thermal cooling device (fan/processor/...) * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself * to all the thermal zone devices registered at the same time. * * Return: a pointer to the created struct thermal_cooling_device or an * ERR_PTR. Caller must check return value with IS_ERR*() helpers. */ struct thermal_cooling_device * devm_thermal_of_cooling_device_register(struct device *dev, struct device_node *np, char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { struct thermal_cooling_device **ptr, *tcd; ptr = devres_alloc(thermal_cooling_device_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); tcd = __thermal_cooling_device_register(np, type, devdata, ops); if (IS_ERR(tcd)) { devres_free(ptr); return tcd; } *ptr = tcd; devres_add(dev, ptr); return tcd; } EXPORT_SYMBOL_GPL(devm_thermal_of_cooling_device_register); static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev) { struct thermal_cooling_device *pos = NULL; list_for_each_entry(pos, &thermal_cdev_list, node) { if (pos == cdev) return true; } return false; } /** * thermal_cooling_device_update - Update a cooling device object * @cdev: Target cooling device. * * Update @cdev to reflect a change of the underlying hardware or platform. * * Must be called when the maximum cooling state of @cdev becomes invalid and so * its .get_max_state() callback needs to be run to produce the new maximum * cooling state value. */ void thermal_cooling_device_update(struct thermal_cooling_device *cdev) { struct thermal_instance *ti; unsigned long state; if (IS_ERR_OR_NULL(cdev)) return; /* * Hold thermal_list_lock throughout the update to prevent the device * from going away while being updated. */ mutex_lock(&thermal_list_lock); if (!thermal_cooling_device_present(cdev)) goto unlock_list; /* * Update under the cdev lock to prevent the state from being set beyond * the new limit concurrently. */ mutex_lock(&cdev->lock); if (cdev->ops->get_max_state(cdev, &cdev->max_state)) goto unlock; thermal_cooling_device_stats_reinit(cdev); list_for_each_entry(ti, &cdev->thermal_instances, cdev_node) { if (ti->upper == cdev->max_state) continue; if (ti->upper < cdev->max_state) { if (ti->upper_no_limit) ti->upper = cdev->max_state; continue; } ti->upper = cdev->max_state; if (ti->lower > ti->upper) ti->lower = ti->upper; if (ti->target == THERMAL_NO_TARGET) continue; if (ti->target > ti->upper) ti->target = ti->upper; } if (cdev->ops->get_cur_state(cdev, &state) || state > cdev->max_state) goto unlock; thermal_cooling_device_stats_update(cdev, state); unlock: mutex_unlock(&cdev->lock); unlock_list: mutex_unlock(&thermal_list_lock); } EXPORT_SYMBOL_GPL(thermal_cooling_device_update); /** * thermal_cooling_device_unregister - removes a thermal cooling device * @cdev: the thermal cooling device to remove. * * thermal_cooling_device_unregister() must be called when a registered * thermal cooling device is no longer needed. */ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) { struct thermal_zone_device *tz; if (!cdev) return; thermal_debug_cdev_remove(cdev); mutex_lock(&thermal_list_lock); if (!thermal_cooling_device_present(cdev)) { mutex_unlock(&thermal_list_lock); return; } list_del(&cdev->node); /* Unbind all thermal zones associated with 'this' cdev */ list_for_each_entry(tz, &thermal_tz_list, node) { if (tz->ops->unbind) tz->ops->unbind(tz, cdev); } mutex_unlock(&thermal_list_lock); device_unregister(&cdev->device); } EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister); static void bind_tz(struct thermal_zone_device *tz) { int ret; struct thermal_cooling_device *pos = NULL; if (!tz->ops->bind) return; mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_cdev_list, node) { ret = tz->ops->bind(tz, pos); if (ret) print_bind_err_msg(tz, pos, ret); } mutex_unlock(&thermal_list_lock); } static void thermal_set_delay_jiffies(unsigned long *delay_jiffies, int delay_ms) { *delay_jiffies = msecs_to_jiffies(delay_ms); if (delay_ms > 1000) *delay_jiffies = round_jiffies(*delay_jiffies); } int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp) { int i, ret = -EINVAL; if (tz->ops->get_crit_temp) return tz->ops->get_crit_temp(tz, temp); if (!tz->trips) return -EINVAL; mutex_lock(&tz->lock); for (i = 0; i < tz->num_trips; i++) { if (tz->trips[i].type == THERMAL_TRIP_CRITICAL) { *temp = tz->trips[i].temperature; ret = 0; break; } } mutex_unlock(&tz->lock); return ret; } EXPORT_SYMBOL_GPL(thermal_zone_get_crit_temp); /** * thermal_zone_device_register_with_trips() - register a new thermal zone device * @type: the thermal zone device type * @trips: a pointer to an array of thermal trips * @num_trips: the number of trip points the thermal zone support * @mask: a bit string indicating the writeablility of trip points * @devdata: private device data * @ops: standard thermal zone device callbacks * @tzp: thermal zone platform parameters * @passive_delay: number of milliseconds to wait between polls when * performing passive cooling * @polling_delay: number of milliseconds to wait between polls when checking * whether trip points have been crossed (0 for interrupt * driven systems) * * This interface function adds a new thermal zone device (sensor) to * /sys/class/thermal folder as thermal_zone[0-*]. It tries to bind all the * thermal cooling devices registered at the same time. * thermal_zone_device_unregister() must be called when the device is no * longer needed. The passive cooling depends on the .get_trend() return value. * * Return: a pointer to the created struct thermal_zone_device or an * in case of error, an ERR_PTR. Caller must check return value with * IS_ERR*() helpers. */ struct thermal_zone_device * thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *trips, int num_trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp, int passive_delay, int polling_delay) { struct thermal_zone_device *tz; int id; int result; struct thermal_governor *governor; if (!type || strlen(type) == 0) { pr_err("No thermal zone type defined\n"); return ERR_PTR(-EINVAL); } if (strlen(type) >= THERMAL_NAME_LENGTH) { pr_err("Thermal zone name (%s) too long, should be under %d chars\n", type, THERMAL_NAME_LENGTH); return ERR_PTR(-EINVAL); } /* * Max trip count can't exceed 31 as the "mask >> num_trips" condition. * For example, shifting by 32 will result in compiler warning: * warning: right shift count >= width of type [-Wshift-count- overflow] * * Also "mask >> num_trips" will always be true with 32 bit shift. * E.g. mask = 0x80000000 for trip id 31 to be RW. Then * mask >> 32 = 0x80000000 * This will result in failure for the below condition. * * Check will be true when the bit 31 of the mask is set. * 32 bit shift will cause overflow of 4 byte integer. */ if (num_trips > (BITS_PER_TYPE(int) - 1) || num_trips < 0 || mask >> num_trips) { pr_err("Incorrect number of thermal trips\n"); return ERR_PTR(-EINVAL); } if (!ops || !ops->get_temp) { pr_err("Thermal zone device ops not defined\n"); return ERR_PTR(-EINVAL); } if (num_trips > 0 && !trips) return ERR_PTR(-EINVAL); if (!thermal_class) return ERR_PTR(-ENODEV); tz = kzalloc(sizeof(*tz), GFP_KERNEL); if (!tz) return ERR_PTR(-ENOMEM); if (tzp) { tz->tzp = kmemdup(tzp, sizeof(*tzp), GFP_KERNEL); if (!tz->tzp) { result = -ENOMEM; goto free_tz; } } INIT_LIST_HEAD(&tz->thermal_instances); INIT_LIST_HEAD(&tz->node); ida_init(&tz->ida); mutex_init(&tz->lock); init_completion(&tz->removal); id = ida_alloc(&thermal_tz_ida, GFP_KERNEL); if (id < 0) { result = id; goto free_tzp; } tz->id = id; strscpy(tz->type, type, sizeof(tz->type)); if (!ops->critical) ops->critical = thermal_zone_device_critical; tz->ops = ops; tz->device.class = thermal_class; tz->devdata = devdata; tz->trips = trips; tz->num_trips = num_trips; thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay); thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay); /* sys I/F */ /* Add nodes that are always present via .groups */ result = thermal_zone_create_device_groups(tz, mask); if (result) goto remove_id; /* A new thermal zone needs to be updated anyway. */ atomic_set(&tz->need_update, 1); result = dev_set_name(&tz->device, "thermal_zone%d", tz->id); if (result) { thermal_zone_destroy_device_groups(tz); goto remove_id; } result = device_register(&tz->device); if (result) goto release_device; /* Update 'this' zone's governor information */ mutex_lock(&thermal_governor_lock); if (tz->tzp) governor = __find_governor(tz->tzp->governor_name); else governor = def_governor; result = thermal_set_governor(tz, governor); if (result) { mutex_unlock(&thermal_governor_lock); goto unregister; } mutex_unlock(&thermal_governor_lock); if (!tz->tzp || !tz->tzp->no_hwmon) { result = thermal_add_hwmon_sysfs(tz); if (result) goto unregister; } mutex_lock(&thermal_list_lock); mutex_lock(&tz->lock); list_add_tail(&tz->node, &thermal_tz_list); mutex_unlock(&tz->lock); mutex_unlock(&thermal_list_lock); /* Bind cooling devices for this zone */ bind_tz(tz); thermal_zone_device_init(tz); /* Update the new thermal zone and mark it as already updated. */ if (atomic_cmpxchg(&tz->need_update, 1, 0)) thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); thermal_notify_tz_create(tz); thermal_debug_tz_add(tz); return tz; unregister: device_del(&tz->device); release_device: put_device(&tz->device); remove_id: ida_free(&thermal_tz_ida, id); free_tzp: kfree(tz->tzp); free_tz: kfree(tz); return ERR_PTR(result); } EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips); struct thermal_zone_device *thermal_tripless_zone_device_register( const char *type, void *devdata, struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp) { return thermal_zone_device_register_with_trips(type, NULL, 0, 0, devdata, ops, tzp, 0, 0); } EXPORT_SYMBOL_GPL(thermal_tripless_zone_device_register); void *thermal_zone_device_priv(struct thermal_zone_device *tzd) { return tzd->devdata; } EXPORT_SYMBOL_GPL(thermal_zone_device_priv); const char *thermal_zone_device_type(struct thermal_zone_device *tzd) { return tzd->type; } EXPORT_SYMBOL_GPL(thermal_zone_device_type); int thermal_zone_device_id(struct thermal_zone_device *tzd) { return tzd->id; } EXPORT_SYMBOL_GPL(thermal_zone_device_id); struct device *thermal_zone_device(struct thermal_zone_device *tzd) { return &tzd->device; } EXPORT_SYMBOL_GPL(thermal_zone_device); /** * thermal_zone_device_unregister - removes the registered thermal zone device * @tz: the thermal zone device to remove */ void thermal_zone_device_unregister(struct thermal_zone_device *tz) { struct thermal_cooling_device *cdev; struct thermal_zone_device *pos = NULL; if (!tz) return; thermal_debug_tz_remove(tz); mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) if (pos == tz) break; if (pos != tz) { /* thermal zone device not found */ mutex_unlock(&thermal_list_lock); return; } mutex_lock(&tz->lock); list_del(&tz->node); mutex_unlock(&tz->lock); /* Unbind all cdevs associated with 'this' thermal zone */ list_for_each_entry(cdev, &thermal_cdev_list, node) if (tz->ops->unbind) tz->ops->unbind(tz, cdev); mutex_unlock(&thermal_list_lock); cancel_delayed_work_sync(&tz->poll_queue); thermal_set_governor(tz, NULL); thermal_remove_hwmon_sysfs(tz); ida_free(&thermal_tz_ida, tz->id); ida_destroy(&tz->ida); device_del(&tz->device); kfree(tz->tzp); put_device(&tz->device); thermal_notify_tz_delete(tz); wait_for_completion(&tz->removal); kfree(tz); } EXPORT_SYMBOL_GPL(thermal_zone_device_unregister); /** * thermal_zone_get_zone_by_name() - search for a zone and returns its ref * @name: thermal zone name to fetch the temperature * * When only one zone is found with the passed name, returns a reference to it. * * Return: On success returns a reference to an unique thermal zone with * matching name equals to @name, an ERR_PTR otherwise (-EINVAL for invalid * paramenters, -ENODEV for not found and -EEXIST for multiple matches). */ struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name) { struct thermal_zone_device *pos = NULL, *ref = ERR_PTR(-EINVAL); unsigned int found = 0; if (!name) goto exit; mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) if (!strncasecmp(name, pos->type, THERMAL_NAME_LENGTH)) { found++; ref = pos; } mutex_unlock(&thermal_list_lock); /* nothing has been found, thus an error code for it */ if (found == 0) ref = ERR_PTR(-ENODEV); else if (found > 1) /* Success only when an unique zone is found */ ref = ERR_PTR(-EEXIST); exit: return ref; } EXPORT_SYMBOL_GPL(thermal_zone_get_zone_by_name); static void thermal_zone_device_resume(struct work_struct *work) { struct thermal_zone_device *tz; tz = container_of(work, struct thermal_zone_device, poll_queue.work); mutex_lock(&tz->lock); tz->suspended = false; thermal_zone_device_init(tz); __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); mutex_unlock(&tz->lock); } static int thermal_pm_notify(struct notifier_block *nb, unsigned long mode, void *_unused) { struct thermal_zone_device *tz; switch (mode) { case PM_HIBERNATION_PREPARE: case PM_RESTORE_PREPARE: case PM_SUSPEND_PREPARE: mutex_lock(&thermal_list_lock); list_for_each_entry(tz, &thermal_tz_list, node) { mutex_lock(&tz->lock); tz->suspended = true; mutex_unlock(&tz->lock); } mutex_unlock(&thermal_list_lock); break; case PM_POST_HIBERNATION: case PM_POST_RESTORE: case PM_POST_SUSPEND: mutex_lock(&thermal_list_lock); list_for_each_entry(tz, &thermal_tz_list, node) { mutex_lock(&tz->lock); cancel_delayed_work(&tz->poll_queue); /* * Replace the work function with the resume one, which * will restore the original work function and schedule * the polling work if needed. */ INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_resume); /* Queue up the work without a delay. */ mod_delayed_work(system_freezable_power_efficient_wq, &tz->poll_queue, 0); mutex_unlock(&tz->lock); } mutex_unlock(&thermal_list_lock); break; default: break; } return 0; } static struct notifier_block thermal_pm_nb = { .notifier_call = thermal_pm_notify, }; static int __init thermal_init(void) { int result; thermal_debug_init(); result = thermal_netlink_init(); if (result) goto error; result = thermal_register_governors(); if (result) goto unregister_netlink; thermal_class = kzalloc(sizeof(*thermal_class), GFP_KERNEL); if (!thermal_class) { result = -ENOMEM; goto unregister_governors; } thermal_class->name = "thermal"; thermal_class->dev_release = thermal_release; result = class_register(thermal_class); if (result) { kfree(thermal_class); thermal_class = NULL; goto unregister_governors; } result = register_pm_notifier(&thermal_pm_nb); if (result) pr_warn("Thermal: Can not register suspend notifier, return %d\n", result); return 0; unregister_governors: thermal_unregister_governors(); unregister_netlink: thermal_netlink_exit(); error: mutex_destroy(&thermal_list_lock); mutex_destroy(&thermal_governor_lock); return result; } postcore_initcall(thermal_init);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_SMT_H #define _LINUX_SCHED_SMT_H #include <linux/static_key.h> #ifdef CONFIG_SCHED_SMT extern struct static_key_false sched_smt_present; static __always_inline bool sched_smt_active(void) { return static_branch_likely(&sched_smt_present); } #else static inline bool sched_smt_active(void) { return false; } #endif void arch_smt_update(void); #endif /* _LINUX_SCHED_SMT_H */
1 1 1 1 1 1 1 25 2 3 8 12 9 4 3 5 5 5 10 31 3 25 1 11 1 11 1 1 3 3 3 2 1 11 11 11 11 11 11 11 24 23 5 5 5 5 5 15 16 11 11 1 11 11 23 23 23 9 11 11 5 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 /* AFS superblock handling * * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved. * * This software may be freely redistributed under the terms of the * GNU General Public License. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Authors: David Howells <dhowells@redhat.com> * David Woodhouse <dwmw2@infradead.org> * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/fs_parser.h> #include <linux/statfs.h> #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/magic.h> #include <net/net_namespace.h> #include "internal.h" static void afs_i_init_once(void *foo); static void afs_kill_super(struct super_block *sb); static struct inode *afs_alloc_inode(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); static void afs_free_inode(struct inode *inode); static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); static int afs_show_devname(struct seq_file *m, struct dentry *root); static int afs_show_options(struct seq_file *m, struct dentry *root); static int afs_init_fs_context(struct fs_context *fc); static const struct fs_parameter_spec afs_fs_parameters[]; struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .init_fs_context = afs_init_fs_context, .parameters = afs_fs_parameters, .kill_sb = afs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE, }; MODULE_ALIAS_FS("afs"); int afs_net_id; static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, .write_inode = netfs_unpin_writeback, .drop_inode = afs_drop_inode, .destroy_inode = afs_destroy_inode, .free_inode = afs_free_inode, .evict_inode = afs_evict_inode, .show_devname = afs_show_devname, .show_options = afs_show_options, }; static struct kmem_cache *afs_inode_cachep; static atomic_t afs_count_active_inodes; enum afs_param { Opt_autocell, Opt_dyn, Opt_flock, Opt_source, }; static const struct constant_table afs_param_flock[] = { {"local", afs_flock_mode_local }, {"openafs", afs_flock_mode_openafs }, {"strict", afs_flock_mode_strict }, {"write", afs_flock_mode_write }, {} }; static const struct fs_parameter_spec afs_fs_parameters[] = { fsparam_flag ("autocell", Opt_autocell), fsparam_flag ("dyn", Opt_dyn), fsparam_enum ("flock", Opt_flock, afs_param_flock), fsparam_string("source", Opt_source), {} }; /* * initialise the filesystem */ int __init afs_fs_init(void) { int ret; _enter(""); /* create ourselves an inode cache */ atomic_set(&afs_count_active_inodes, 0); ret = -ENOMEM; afs_inode_cachep = kmem_cache_create("afs_inode_cache", sizeof(struct afs_vnode), 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, afs_i_init_once); if (!afs_inode_cachep) { printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n"); return ret; } /* now export our filesystem to lesser mortals */ ret = register_filesystem(&afs_fs_type); if (ret < 0) { kmem_cache_destroy(afs_inode_cachep); _leave(" = %d", ret); return ret; } _leave(" = 0"); return 0; } /* * clean up the filesystem */ void afs_fs_exit(void) { _enter(""); afs_mntpt_kill_timer(); unregister_filesystem(&afs_fs_type); if (atomic_read(&afs_count_active_inodes) != 0) { printk("kAFS: %d active inode objects still present\n", atomic_read(&afs_count_active_inodes)); BUG(); } /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(afs_inode_cachep); _leave(""); } /* * Display the mount device name in /proc/mounts. */ static int afs_show_devname(struct seq_file *m, struct dentry *root) { struct afs_super_info *as = AFS_FS_S(root->d_sb); struct afs_volume *volume = as->volume; struct afs_cell *cell = as->cell; const char *suf = ""; char pref = '%'; if (as->dyn_root) { seq_puts(m, "none"); return 0; } switch (volume->type) { case AFSVL_RWVOL: break; case AFSVL_ROVOL: pref = '#'; if (volume->type_force) suf = ".readonly"; break; case AFSVL_BACKVOL: pref = '#'; suf = ".backup"; break; } seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf); return 0; } /* * Display the mount options in /proc/mounts. */ static int afs_show_options(struct seq_file *m, struct dentry *root) { struct afs_super_info *as = AFS_FS_S(root->d_sb); const char *p = NULL; if (as->dyn_root) seq_puts(m, ",dyn"); if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags)) seq_puts(m, ",autocell"); switch (as->flock_mode) { case afs_flock_mode_unset: break; case afs_flock_mode_local: p = "local"; break; case afs_flock_mode_openafs: p = "openafs"; break; case afs_flock_mode_strict: p = "strict"; break; case afs_flock_mode_write: p = "write"; break; } if (p) seq_printf(m, ",flock=%s", p); return 0; } /* * Parse the source name to get cell name, volume name, volume type and R/W * selector. * * This can be one of the following: * "%[cell:]volume[.]" R/W volume * "#[cell:]volume[.]" R/O or R/W volume (R/O parent), * or R/W (R/W parent) volume * "%[cell:]volume.readonly" R/O volume * "#[cell:]volume.readonly" R/O volume * "%[cell:]volume.backup" Backup volume * "#[cell:]volume.backup" Backup volume */ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) { struct afs_fs_context *ctx = fc->fs_private; struct afs_cell *cell; const char *cellname, *suffix, *name = param->string; int cellnamesz; _enter(",%s", name); if (fc->source) return invalf(fc, "kAFS: Multiple sources not supported"); if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; } if ((name[0] != '%' && name[0] != '#') || !name[1]) { /* To use dynroot, we don't want to have to provide a source */ if (strcmp(name, "none") == 0) { ctx->no_cell = true; return 0; } printk(KERN_ERR "kAFS: unparsable volume name\n"); return -EINVAL; } /* determine the type of volume we're looking for */ if (name[0] == '%') { ctx->type = AFSVL_RWVOL; ctx->force = true; } name++; /* split the cell name out if there is one */ ctx->volname = strchr(name, ':'); if (ctx->volname) { cellname = name; cellnamesz = ctx->volname - name; ctx->volname++; } else { ctx->volname = name; cellname = NULL; cellnamesz = 0; } /* the volume type is further affected by a possible suffix */ suffix = strrchr(ctx->volname, '.'); if (suffix) { if (strcmp(suffix, ".readonly") == 0) { ctx->type = AFSVL_ROVOL; ctx->force = true; } else if (strcmp(suffix, ".backup") == 0) { ctx->type = AFSVL_BACKVOL; ctx->force = true; } else if (suffix[1] == 0) { } else { suffix = NULL; } } ctx->volnamesz = suffix ? suffix - ctx->volname : strlen(ctx->volname); _debug("cell %*.*s [%p]", cellnamesz, cellnamesz, cellname ?: "", ctx->cell); /* lookup the cell record */ if (cellname) { cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, NULL, false); if (IS_ERR(cell)) { pr_err("kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse); afs_see_cell(cell, afs_cell_trace_see_source); ctx->cell = cell; } _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", ctx->cell->name, ctx->cell, ctx->volnamesz, ctx->volnamesz, ctx->volname, suffix ?: "-", ctx->type, ctx->force ? " FORCE" : ""); fc->source = param->string; param->string = NULL; return 0; } /* * Parse a single mount parameter. */ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct afs_fs_context *ctx = fc->fs_private; int opt; opt = fs_parse(fc, afs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_source: return afs_parse_source(fc, param); case Opt_autocell: ctx->autocell = true; break; case Opt_dyn: ctx->dyn_root = true; break; case Opt_flock: ctx->flock_mode = result.uint_32; break; default: return -EINVAL; } _leave(" = 0"); return 0; } /* * Validate the options, get the cell key and look up the volume. */ static int afs_validate_fc(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_volume *volume; struct afs_cell *cell; struct key *key; int ret; if (!ctx->dyn_root) { if (ctx->no_cell) { pr_warn("kAFS: Can only specify source 'none' with -o dyn\n"); return -EINVAL; } if (!ctx->cell) { pr_warn("kAFS: No cell specified\n"); return -EDESTADDRREQ; } reget_key: /* We try to do the mount securely. */ key = afs_request_key(ctx->cell); if (IS_ERR(key)) return PTR_ERR(key); ctx->key = key; if (ctx->volume) { afs_put_volume(ctx->volume, afs_volume_trace_put_validate_fc); ctx->volume = NULL; } if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) { ret = afs_cell_detect_alias(ctx->cell, key); if (ret < 0) return ret; if (ret == 1) { _debug("switch to alias"); key_put(ctx->key); ctx->key = NULL; cell = afs_use_cell(ctx->cell->alias_of, afs_cell_trace_use_fc_alias); afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); ctx->cell = cell; goto reget_key; } } volume = afs_create_volume(ctx); if (IS_ERR(volume)) return PTR_ERR(volume); ctx->volume = volume; if (volume->type != AFSVL_RWVOL) { ctx->flock_mode = afs_flock_mode_local; fc->sb_flags |= SB_RDONLY; } } return 0; } /* * check a superblock to see if it's the one we're looking for */ static int afs_test_super(struct super_block *sb, struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as = AFS_FS_S(sb); return (as->net_ns == fc->net_ns && as->volume && as->volume->vid == ctx->volume->vid && as->cell == ctx->cell && !as->dyn_root); } static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc) { struct afs_super_info *as = AFS_FS_S(sb); return (as->net_ns == fc->net_ns && as->dyn_root); } static int afs_set_super(struct super_block *sb, struct fs_context *fc) { return set_anon_super(sb, NULL); } /* * fill in the superblock */ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) { struct afs_super_info *as = AFS_FS_S(sb); struct inode *inode = NULL; int ret; _enter(""); /* fill in the superblock */ sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; if (!as->dyn_root) sb->s_xattr = afs_xattr_handlers; ret = super_setup_bdi(sb); if (ret) return ret; /* allocate the root inode and dentry */ if (as->dyn_root) { inode = afs_iget_pseudo_dir(sb, true); } else { sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); inode = afs_root_iget(sb, ctx->key); } if (IS_ERR(inode)) return PTR_ERR(inode); if (ctx->autocell || as->dyn_root) set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); ret = -ENOMEM; sb->s_root = d_make_root(inode); if (!sb->s_root) goto error; if (as->dyn_root) { sb->s_d_op = &afs_dynroot_dentry_operations; ret = afs_dynroot_populate(sb); if (ret < 0) goto error; } else { sb->s_d_op = &afs_fs_dentry_operations; rcu_assign_pointer(as->volume->sb, sb); } _leave(" = 0"); return 0; error: _leave(" = %d", ret); return ret; } static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as; as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { as->net_ns = get_net(fc->net_ns); as->flock_mode = ctx->flock_mode; if (ctx->dyn_root) { as->dyn_root = true; } else { as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi); as->volume = afs_get_volume(ctx->volume, afs_volume_trace_get_alloc_sbi); } } return as; } static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { struct afs_net *net = afs_net(as->net_ns); afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi); afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi); put_net(as->net_ns); kfree(as); } } static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = AFS_FS_S(sb); if (as->dyn_root) afs_dynroot_depopulate(sb); /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ if (as->volume) rcu_assign_pointer(as->volume->sb, NULL); kill_anon_super(sb); if (as->volume) afs_deactivate_volume(as->volume); afs_destroy_sbi(as); } /* * Get an AFS superblock and root directory. */ static int afs_get_tree(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct super_block *sb; struct afs_super_info *as; int ret; ret = afs_validate_fc(fc); if (ret) goto error; _enter(""); /* allocate a superblock info record */ ret = -ENOMEM; as = afs_alloc_sbi(fc); if (!as) goto error; fc->s_fs_info = as; /* allocate a deviceless superblock */ sb = sget_fc(fc, as->dyn_root ? afs_dynroot_test_super : afs_test_super, afs_set_super); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto error; } if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); ret = afs_fill_super(sb, ctx); if (ret < 0) goto error_sb; sb->s_flags |= SB_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, SB_ACTIVE); } fc->root = dget(sb->s_root); trace_afs_get_tree(as->cell, as->volume); _leave(" = 0 [%p]", sb); return 0; error_sb: deactivate_locked_super(sb); error: _leave(" = %d", ret); return ret; } static void afs_free_fc(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; afs_destroy_sbi(fc->s_fs_info); afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc); afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); key_put(ctx->key); kfree(ctx); } static const struct fs_context_operations afs_context_ops = { .free = afs_free_fc, .parse_param = afs_parse_param, .get_tree = afs_get_tree, }; /* * Set up the filesystem mount context. */ static int afs_init_fs_context(struct fs_context *fc) { struct afs_fs_context *ctx; struct afs_cell *cell; ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->type = AFSVL_ROVOL; ctx->net = afs_net(fc->net_ns); /* Default to the workstation cell. */ cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc); if (IS_ERR(cell)) cell = NULL; ctx->cell = cell; fc->fs_private = ctx; fc->ops = &afs_context_ops; return 0; } /* * Initialise an inode cache slab element prior to any use. Note that * afs_alloc_inode() *must* reset anything that could incorrectly leak from one * inode to another. */ static void afs_i_init_once(void *_vnode) { struct afs_vnode *vnode = _vnode; memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->netfs.inode); mutex_init(&vnode->io_lock); init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); INIT_LIST_HEAD(&vnode->wb_keys); INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); INIT_LIST_HEAD(&vnode->cb_mmap_link); seqlock_init(&vnode->cb_lock); } /* * allocate an AFS inode struct from our slab cache */ static struct inode *afs_alloc_inode(struct super_block *sb) { struct afs_vnode *vnode; vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL); if (!vnode) return NULL; atomic_inc(&afs_count_active_inodes); /* Reset anything that shouldn't leak from one inode to the next. */ memset(&vnode->fid, 0, sizeof(vnode->fid)); memset(&vnode->status, 0, sizeof(vnode->status)); afs_vnode_set_cache(vnode, NULL); vnode->volume = NULL; vnode->lock_key = NULL; vnode->permit_cache = NULL; vnode->flags = 1 << AFS_VNODE_UNSET; vnode->lock_state = AFS_VNODE_LOCK_NONE; init_rwsem(&vnode->rmdir_lock); INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work); _leave(" = %p", &vnode->netfs.inode); return &vnode->netfs.inode; } static void afs_free_inode(struct inode *inode) { kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode)); } /* * destroy an AFS inode struct */ static void afs_destroy_inode(struct inode *inode) { struct afs_vnode *vnode = AFS_FS_I(inode); _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode); _debug("DESTROY INODE %p", inode); atomic_dec(&afs_count_active_inodes); } static void afs_get_volume_status_success(struct afs_operation *op) { struct afs_volume_status *vs = &op->volstatus.vs; struct kstatfs *buf = op->volstatus.buf; if (vs->max_quota == 0) buf->f_blocks = vs->part_max_blocks; else buf->f_blocks = vs->max_quota; if (buf->f_blocks > vs->blocks_in_use) buf->f_bavail = buf->f_bfree = buf->f_blocks - vs->blocks_in_use; } static const struct afs_operation_ops afs_get_volume_status_operation = { .issue_afs_rpc = afs_fs_get_volume_status, .issue_yfs_rpc = yfs_fs_get_volume_status, .success = afs_get_volume_status_success, }; /* * return information about an AFS volume */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct afs_super_info *as = AFS_FS_S(dentry->d_sb); struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = AFS_BLOCK_SIZE; buf->f_namelen = AFSNAMEMAX - 1; if (as->dyn_root) { buf->f_blocks = 1; buf->f_bavail = 0; buf->f_bfree = 0; return 0; } op = afs_alloc_operation(NULL, as->volume); if (IS_ERR(op)) return PTR_ERR(op); afs_op_set_vnode(op, 0, vnode); op->nr_files = 1; op->volstatus.buf = buf; op->ops = &afs_get_volume_status_operation; return afs_do_sync_operation(op); }
1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) Qu Wenruo 2017. All rights reserved. */ /* * The module is used to catch unexpected/corrupted tree block data. * Such behavior can be caused either by a fuzzed image or bugs. * * The objective is to do leaf/node validation checks when tree block is read * from disk, and check *every* possible member, so other code won't * need to checking them again. * * Due to the potential and unwanted damage, every checker needs to be * carefully reviewed otherwise so it does not prevent mount of valid images. */ #include <linux/types.h> #include <linux/stddef.h> #include <linux/error-injection.h> #include "messages.h" #include "ctree.h" #include "tree-checker.h" #include "disk-io.h" #include "compression.h" #include "volumes.h" #include "misc.h" #include "fs.h" #include "accessors.h" #include "file-item.h" #include "inode-item.h" #include "dir-item.h" #include "raid-stripe-tree.h" #include "extent-tree.h" /* * Error message should follow the following format: * corrupt <type>: <identifier>, <reason>[, <bad_value>] * * @type: leaf or node * @identifier: the necessary info to locate the leaf/node. * It's recommended to decode key.objecitd/offset if it's * meaningful. * @reason: describe the error * @bad_value: optional, it's recommended to output bad value and its * expected value (range). * * Since comma is used to separate the components, only space is allowed * inside each component. */ /* * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. * Allows callers to customize the output. */ __printf(3, 4) __cold static void generic_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { const struct btrfs_fs_info *fs_info = eb->fs_info; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; btrfs_crit(fs_info, "corrupt %s: root=%llu block=%llu slot=%d, %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf); va_end(args); } /* * Customized reporter for extent data item, since its key objectid and * offset has its own meaning. */ __printf(3, 4) __cold static void file_extent_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { const struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_key key; struct va_format vaf; va_list args; btrfs_item_key_to_cpu(eb, &key, slot); va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; btrfs_crit(fs_info, "corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf); va_end(args); } /* * Return 0 if the btrfs_file_extent_##name is aligned to @alignment * Else return 1 */ #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment) \ ({ \ if (unlikely(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), \ (alignment)))) \ file_extent_err((leaf), (slot), \ "invalid %s for file extent, have %llu, should be aligned to %u", \ (#name), btrfs_file_extent_##name((leaf), (fi)), \ (alignment)); \ (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ }) static u64 file_extent_end(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_file_extent_item *extent) { u64 end; u64 len; if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { len = btrfs_file_extent_ram_bytes(leaf, extent); end = ALIGN(key->offset + len, leaf->fs_info->sectorsize); } else { len = btrfs_file_extent_num_bytes(leaf, extent); end = key->offset + len; } return end; } /* * Customized report for dir_item, the only new important information is * key->objectid, which represents inode number */ __printf(3, 4) __cold static void dir_item_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { const struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_key key; struct va_format vaf; va_list args; btrfs_item_key_to_cpu(eb, &key, slot); va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; btrfs_crit(fs_info, "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, key.objectid, &vaf); va_end(args); } /* * This functions checks prev_key->objectid, to ensure current key and prev_key * share the same objectid as inode number. * * This is to detect missing INODE_ITEM in subvolume trees. * * Return true if everything is OK or we don't need to check. * Return false if anything is wrong. */ static bool check_prev_ino(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) { /* No prev key, skip check */ if (slot == 0) return true; /* Only these key->types needs to be checked */ ASSERT(key->type == BTRFS_XATTR_ITEM_KEY || key->type == BTRFS_INODE_REF_KEY || key->type == BTRFS_DIR_INDEX_KEY || key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_EXTENT_DATA_KEY); /* * Only subvolume trees along with their reloc trees need this check. * Things like log tree doesn't follow this ino requirement. */ if (!is_fstree(btrfs_header_owner(leaf))) return true; if (key->objectid == prev_key->objectid) return true; /* Error found */ dir_item_err(leaf, slot, "invalid previous key objectid, have %llu expect %llu", prev_key->objectid, key->objectid); return false; } static int check_extent_data_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_file_extent_item *fi; u32 sectorsize = fs_info->sectorsize; u32 item_size = btrfs_item_size(leaf, slot); u64 extent_end; if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { file_extent_err(leaf, slot, "unaligned file_offset for file extent, have %llu should be aligned to %u", key->offset, sectorsize); return -EUCLEAN; } /* * Previous key must have the same key->objectid (ino). * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA. * But if objectids mismatch, it means we have a missing * INODE_ITEM. */ if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) return -EUCLEAN; fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); /* * Make sure the item contains at least inline header, so the file * extent type is not some garbage. */ if (unlikely(item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START)) { file_extent_err(leaf, slot, "invalid item size, have %u expect [%zu, %u)", item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START, SZ_4K); return -EUCLEAN; } if (unlikely(btrfs_file_extent_type(leaf, fi) >= BTRFS_NR_FILE_EXTENT_TYPES)) { file_extent_err(leaf, slot, "invalid type for file extent, have %u expect range [0, %u]", btrfs_file_extent_type(leaf, fi), BTRFS_NR_FILE_EXTENT_TYPES - 1); return -EUCLEAN; } /* * Support for new compression/encryption must introduce incompat flag, * and must be caught in open_ctree(). */ if (unlikely(btrfs_file_extent_compression(leaf, fi) >= BTRFS_NR_COMPRESS_TYPES)) { file_extent_err(leaf, slot, "invalid compression for file extent, have %u expect range [0, %u]", btrfs_file_extent_compression(leaf, fi), BTRFS_NR_COMPRESS_TYPES - 1); return -EUCLEAN; } if (unlikely(btrfs_file_extent_encryption(leaf, fi))) { file_extent_err(leaf, slot, "invalid encryption for file extent, have %u expect 0", btrfs_file_extent_encryption(leaf, fi)); return -EUCLEAN; } if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { /* Inline extent must have 0 as key offset */ if (unlikely(key->offset)) { file_extent_err(leaf, slot, "invalid file_offset for inline file extent, have %llu expect 0", key->offset); return -EUCLEAN; } /* Compressed inline extent has no on-disk size, skip it */ if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) return 0; /* Uncompressed inline extent size must match item size */ if (unlikely(item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + btrfs_file_extent_ram_bytes(leaf, fi))) { file_extent_err(leaf, slot, "invalid ram_bytes for uncompressed inline extent, have %u expect %llu", item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START + btrfs_file_extent_ram_bytes(leaf, fi)); return -EUCLEAN; } return 0; } /* Regular or preallocated extent has fixed item size */ if (unlikely(item_size != sizeof(*fi))) { file_extent_err(leaf, slot, "invalid item size for reg/prealloc file extent, have %u expect %zu", item_size, sizeof(*fi)); return -EUCLEAN; } if (unlikely(CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) || CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) || CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) || CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) || CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))) return -EUCLEAN; /* Catch extent end overflow */ if (unlikely(check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi), key->offset, &extent_end))) { file_extent_err(leaf, slot, "extent end overflow, have file offset %llu extent num bytes %llu", key->offset, btrfs_file_extent_num_bytes(leaf, fi)); return -EUCLEAN; } /* * Check that no two consecutive file extent items, in the same leaf, * present ranges that overlap each other. */ if (slot > 0 && prev_key->objectid == key->objectid && prev_key->type == BTRFS_EXTENT_DATA_KEY) { struct btrfs_file_extent_item *prev_fi; u64 prev_end; prev_fi = btrfs_item_ptr(leaf, slot - 1, struct btrfs_file_extent_item); prev_end = file_extent_end(leaf, prev_key, prev_fi); if (unlikely(prev_end > key->offset)) { file_extent_err(leaf, slot - 1, "file extent end range (%llu) goes beyond start offset (%llu) of the next file extent", prev_end, key->offset); return -EUCLEAN; } } return 0; } static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; u32 sectorsize = fs_info->sectorsize; const u32 csumsize = fs_info->csum_size; if (unlikely(key->objectid != BTRFS_EXTENT_CSUM_OBJECTID)) { generic_err(leaf, slot, "invalid key objectid for csum item, have %llu expect %llu", key->objectid, BTRFS_EXTENT_CSUM_OBJECTID); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { generic_err(leaf, slot, "unaligned key offset for csum item, have %llu should be aligned to %u", key->offset, sectorsize); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(btrfs_item_size(leaf, slot), csumsize))) { generic_err(leaf, slot, "unaligned item size for csum item, have %u should be aligned to %u", btrfs_item_size(leaf, slot), csumsize); return -EUCLEAN; } if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) { u64 prev_csum_end; u32 prev_item_size; prev_item_size = btrfs_item_size(leaf, slot - 1); prev_csum_end = (prev_item_size / csumsize) * sectorsize; prev_csum_end += prev_key->offset; if (unlikely(prev_csum_end > key->offset)) { generic_err(leaf, slot - 1, "csum end range (%llu) goes beyond the start range (%llu) of the next csum item", prev_csum_end, key->offset); return -EUCLEAN; } } return 0; } /* Inode item error output has the same format as dir_item_err() */ #define inode_item_err(eb, slot, fmt, ...) \ dir_item_err(eb, slot, fmt, __VA_ARGS__) static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_key item_key; bool is_inode_item; btrfs_item_key_to_cpu(leaf, &item_key, slot); is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY); /* For XATTR_ITEM, location key should be all 0 */ if (item_key.type == BTRFS_XATTR_ITEM_KEY) { if (unlikely(key->objectid != 0 || key->type != 0 || key->offset != 0)) return -EUCLEAN; return 0; } if (unlikely((key->objectid < BTRFS_FIRST_FREE_OBJECTID || key->objectid > BTRFS_LAST_FREE_OBJECTID) && key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && key->objectid != BTRFS_FREE_INO_OBJECTID)) { if (is_inode_item) { generic_err(leaf, slot, "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID, BTRFS_FREE_INO_OBJECTID); } else { dir_item_err(leaf, slot, "invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu", key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID, BTRFS_FREE_INO_OBJECTID); } return -EUCLEAN; } if (unlikely(key->offset != 0)) { if (is_inode_item) inode_item_err(leaf, slot, "invalid key offset: has %llu expect 0", key->offset); else dir_item_err(leaf, slot, "invalid location key offset:has %llu expect 0", key->offset); return -EUCLEAN; } return 0; } static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_key item_key; bool is_root_item; btrfs_item_key_to_cpu(leaf, &item_key, slot); is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); /* * Bad rootid for reloc trees. * * Reloc trees are only for subvolume trees, other trees only need * to be COWed to be relocated. */ if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID && !is_fstree(key->offset))) { generic_err(leaf, slot, "invalid reloc tree for root %lld, root id is not a subvolume tree", key->offset); return -EUCLEAN; } /* No such tree id */ if (unlikely(key->objectid == 0)) { if (is_root_item) generic_err(leaf, slot, "invalid root id 0"); else dir_item_err(leaf, slot, "invalid location key root id 0"); return -EUCLEAN; } /* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */ if (unlikely(!is_fstree(key->objectid) && !is_root_item)) { dir_item_err(leaf, slot, "invalid location key objectid, have %llu expect [%llu, %llu]", key->objectid, BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID); return -EUCLEAN; } /* * ROOT_ITEM with non-zero offset means this is a snapshot, created at * @offset transid. * Furthermore, for location key in DIR_ITEM, its offset is always -1. * * So here we only check offset for reloc tree whose key->offset must * be a valid tree. */ if (unlikely(key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0)) { generic_err(leaf, slot, "invalid root id 0 for reloc tree"); return -EUCLEAN; } return 0; } static int check_dir_item(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_key *prev_key, int slot) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_dir_item *di; u32 item_size = btrfs_item_size(leaf, slot); u32 cur = 0; if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) return -EUCLEAN; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); while (cur < item_size) { struct btrfs_key location_key; u32 name_len; u32 data_len; u32 max_name_len; u32 total_size; u32 name_hash; u8 dir_type; int ret; /* header itself should not cross item boundary */ if (unlikely(cur + sizeof(*di) > item_size)) { dir_item_err(leaf, slot, "dir item header crosses item boundary, have %zu boundary %u", cur + sizeof(*di), item_size); return -EUCLEAN; } /* Location key check */ btrfs_dir_item_key_to_cpu(leaf, di, &location_key); if (location_key.type == BTRFS_ROOT_ITEM_KEY) { ret = check_root_key(leaf, &location_key, slot); if (unlikely(ret < 0)) return ret; } else if (location_key.type == BTRFS_INODE_ITEM_KEY || location_key.type == 0) { ret = check_inode_key(leaf, &location_key, slot); if (unlikely(ret < 0)) return ret; } else { dir_item_err(leaf, slot, "invalid location key type, have %u, expect %u or %u", location_key.type, BTRFS_ROOT_ITEM_KEY, BTRFS_INODE_ITEM_KEY); return -EUCLEAN; } /* dir type check */ dir_type = btrfs_dir_ftype(leaf, di); if (unlikely(dir_type >= BTRFS_FT_MAX)) { dir_item_err(leaf, slot, "invalid dir item type, have %u expect [0, %u)", dir_type, BTRFS_FT_MAX); return -EUCLEAN; } if (unlikely(key->type == BTRFS_XATTR_ITEM_KEY && dir_type != BTRFS_FT_XATTR)) { dir_item_err(leaf, slot, "invalid dir item type for XATTR key, have %u expect %u", dir_type, BTRFS_FT_XATTR); return -EUCLEAN; } if (unlikely(dir_type == BTRFS_FT_XATTR && key->type != BTRFS_XATTR_ITEM_KEY)) { dir_item_err(leaf, slot, "xattr dir type found for non-XATTR key"); return -EUCLEAN; } if (dir_type == BTRFS_FT_XATTR) max_name_len = XATTR_NAME_MAX; else max_name_len = BTRFS_NAME_LEN; /* Name/data length check */ name_len = btrfs_dir_name_len(leaf, di); data_len = btrfs_dir_data_len(leaf, di); if (unlikely(name_len > max_name_len)) { dir_item_err(leaf, slot, "dir item name len too long, have %u max %u", name_len, max_name_len); return -EUCLEAN; } if (unlikely(name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info))) { dir_item_err(leaf, slot, "dir item name and data len too long, have %u max %u", name_len + data_len, BTRFS_MAX_XATTR_SIZE(fs_info)); return -EUCLEAN; } if (unlikely(data_len && dir_type != BTRFS_FT_XATTR)) { dir_item_err(leaf, slot, "dir item with invalid data len, have %u expect 0", data_len); return -EUCLEAN; } total_size = sizeof(*di) + name_len + data_len; /* header and name/data should not cross item boundary */ if (unlikely(cur + total_size > item_size)) { dir_item_err(leaf, slot, "dir item data crosses item boundary, have %u boundary %u", cur + total_size, item_size); return -EUCLEAN; } /* * Special check for XATTR/DIR_ITEM, as key->offset is name * hash, should match its name */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); name_hash = btrfs_name_hash(namebuf, name_len); if (unlikely(key->offset != name_hash)) { dir_item_err(leaf, slot, "name hash mismatch with key, have 0x%016x expect 0x%016llx", name_hash, key->offset); return -EUCLEAN; } } cur += total_size; di = (struct btrfs_dir_item *)((void *)di + total_size); } return 0; } __printf(3, 4) __cold static void block_group_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { const struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_key key; struct va_format vaf; va_list args; btrfs_item_key_to_cpu(eb, &key, slot); va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; btrfs_crit(fs_info, "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf); va_end(args); } static int check_block_group_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_block_group_item bgi; u32 item_size = btrfs_item_size(leaf, slot); u64 chunk_objectid; u64 flags; u64 type; /* * Here we don't really care about alignment since extent allocator can * handle it. We care more about the size. */ if (unlikely(key->offset == 0)) { block_group_err(leaf, slot, "invalid block group size 0"); return -EUCLEAN; } if (unlikely(item_size != sizeof(bgi))) { block_group_err(leaf, slot, "invalid item size, have %u expect %zu", item_size, sizeof(bgi)); return -EUCLEAN; } read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), sizeof(bgi)); chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi); if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { /* * We don't init the nr_global_roots until we load the global * roots, so this could be 0 at mount time. If it's 0 we'll * just assume we're fine, and later we'll check against our * actual value. */ if (unlikely(fs_info->nr_global_roots && chunk_objectid >= fs_info->nr_global_roots)) { block_group_err(leaf, slot, "invalid block group global root id, have %llu, needs to be <= %llu", chunk_objectid, fs_info->nr_global_roots); return -EUCLEAN; } } else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { block_group_err(leaf, slot, "invalid block group chunk objectid, have %llu expect %llu", btrfs_stack_block_group_chunk_objectid(&bgi), BTRFS_FIRST_CHUNK_TREE_OBJECTID); return -EUCLEAN; } if (unlikely(btrfs_stack_block_group_used(&bgi) > key->offset)) { block_group_err(leaf, slot, "invalid block group used, have %llu expect [0, %llu)", btrfs_stack_block_group_used(&bgi), key->offset); return -EUCLEAN; } flags = btrfs_stack_block_group_flags(&bgi); if (unlikely(hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1)) { block_group_err(leaf, slot, "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); return -EUCLEAN; } type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; if (unlikely(type != BTRFS_BLOCK_GROUP_DATA && type != BTRFS_BLOCK_GROUP_METADATA && type != BTRFS_BLOCK_GROUP_SYSTEM && type != (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { block_group_err(leaf, slot, "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", type, hweight64(type), BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); return -EUCLEAN; } return 0; } __printf(4, 5) __cold static void chunk_err(const struct extent_buffer *leaf, const struct btrfs_chunk *chunk, u64 logical, const char *fmt, ...) { const struct btrfs_fs_info *fs_info = leaf->fs_info; bool is_sb; struct va_format vaf; va_list args; int i; int slot = -1; /* Only superblock eb is able to have such small offset */ is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET); if (!is_sb) { /* * Get the slot number by iterating through all slots, this * would provide better readability. */ for (i = 0; i < btrfs_header_nritems(leaf); i++) { if (btrfs_item_ptr_offset(leaf, i) == (unsigned long)chunk) { slot = i; break; } } } va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; if (is_sb) btrfs_crit(fs_info, "corrupt superblock syschunk array: chunk_start=%llu, %pV", logical, &vaf); else btrfs_crit(fs_info, "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV", BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot, logical, &vaf); va_end(args); } /* * The common chunk check which could also work on super block sys chunk array. * * Return -EUCLEAN if anything is corrupted. * Return 0 if everything is OK. */ int btrfs_check_chunk_valid(struct extent_buffer *leaf, struct btrfs_chunk *chunk, u64 logical) { struct btrfs_fs_info *fs_info = leaf->fs_info; u64 length; u64 chunk_end; u64 stripe_len; u16 num_stripes; u16 sub_stripes; u64 type; u64 features; bool mixed = false; int raid_index; int nparity; int ncopies; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); type = btrfs_chunk_type(leaf, chunk); raid_index = btrfs_bg_flags_to_raid_index(type); ncopies = btrfs_raid_array[raid_index].ncopies; nparity = btrfs_raid_array[raid_index].nparity; if (unlikely(!num_stripes)) { chunk_err(leaf, chunk, logical, "invalid chunk num_stripes, have %u", num_stripes); return -EUCLEAN; } if (unlikely(num_stripes < ncopies)) { chunk_err(leaf, chunk, logical, "invalid chunk num_stripes < ncopies, have %u < %d", num_stripes, ncopies); return -EUCLEAN; } if (unlikely(nparity && num_stripes == nparity)) { chunk_err(leaf, chunk, logical, "invalid chunk num_stripes == nparity, have %u == %d", num_stripes, nparity); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) { chunk_err(leaf, chunk, logical, "invalid chunk logical, have %llu should aligned to %u", logical, fs_info->sectorsize); return -EUCLEAN; } if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) { chunk_err(leaf, chunk, logical, "invalid chunk sectorsize, have %u expect %u", btrfs_chunk_sector_size(leaf, chunk), fs_info->sectorsize); return -EUCLEAN; } if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) { chunk_err(leaf, chunk, logical, "invalid chunk length, have %llu", length); return -EUCLEAN; } if (unlikely(check_add_overflow(logical, length, &chunk_end))) { chunk_err(leaf, chunk, logical, "invalid chunk logical start and length, have logical start %llu length %llu", logical, length); return -EUCLEAN; } if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { chunk_err(leaf, chunk, logical, "invalid chunk stripe length: %llu", stripe_len); return -EUCLEAN; } /* * We artificially limit the chunk size, so that the number of stripes * inside a chunk can be fit into a U32. The current limit (256G) is * way too large for real world usage anyway, and it's also much larger * than our existing limit (10G). * * Thus it should be a good way to catch obvious bitflips. */ if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) { chunk_err(leaf, chunk, logical, "chunk length too large: have %llu limit %llu", length, btrfs_stripe_nr_to_offset(U32_MAX)); return -EUCLEAN; } if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK))) { chunk_err(leaf, chunk, logical, "unrecognized chunk type: 0x%llx", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); return -EUCLEAN; } if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) { chunk_err(leaf, chunk, logical, "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", type & BTRFS_BLOCK_GROUP_PROFILE_MASK); return -EUCLEAN; } if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) { chunk_err(leaf, chunk, logical, "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", type, BTRFS_BLOCK_GROUP_TYPE_MASK); return -EUCLEAN; } if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) && (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)))) { chunk_err(leaf, chunk, logical, "system chunk with data or metadata type: 0x%llx", type); return -EUCLEAN; } features = btrfs_super_incompat_flags(fs_info->super_copy); if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) mixed = true; if (!mixed) { if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) && (type & BTRFS_BLOCK_GROUP_DATA))) { chunk_err(leaf, chunk, logical, "mixed chunk type in non-mixed mode: 0x%llx", type); return -EUCLEAN; } } if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes) || (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1].devs_min) || (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min) || (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min) || (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < btrfs_raid_array[BTRFS_RAID_RAID5].devs_min) || (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < btrfs_raid_array[BTRFS_RAID_RAID6].devs_min) || (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) || ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) { chunk_err(leaf, chunk, logical, "invalid num_stripes:sub_stripes %u:%u for profile %llu", num_stripes, sub_stripes, type & BTRFS_BLOCK_GROUP_PROFILE_MASK); return -EUCLEAN; } return 0; } /* * Enhanced version of chunk item checker. * * The common btrfs_check_chunk_valid() doesn't check item size since it needs * to work on super block sys_chunk_array which doesn't have full item ptr. */ static int check_leaf_chunk_item(struct extent_buffer *leaf, struct btrfs_chunk *chunk, struct btrfs_key *key, int slot) { int num_stripes; if (unlikely(btrfs_item_size(leaf, slot) < sizeof(struct btrfs_chunk))) { chunk_err(leaf, chunk, key->offset, "invalid chunk item size: have %u expect [%zu, %u)", btrfs_item_size(leaf, slot), sizeof(struct btrfs_chunk), BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); return -EUCLEAN; } num_stripes = btrfs_chunk_num_stripes(leaf, chunk); /* Let btrfs_check_chunk_valid() handle this error type */ if (num_stripes == 0) goto out; if (unlikely(btrfs_chunk_item_size(num_stripes) != btrfs_item_size(leaf, slot))) { chunk_err(leaf, chunk, key->offset, "invalid chunk item size: have %u expect %lu", btrfs_item_size(leaf, slot), btrfs_chunk_item_size(num_stripes)); return -EUCLEAN; } out: return btrfs_check_chunk_valid(leaf, chunk, key->offset); } __printf(3, 4) __cold static void dev_item_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { struct btrfs_key key; struct va_format vaf; va_list args; btrfs_item_key_to_cpu(eb, &key, slot); va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; btrfs_crit(eb->fs_info, "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, key.objectid, &vaf); va_end(args); } static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem; const u32 item_size = btrfs_item_size(leaf, slot); if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) { dev_item_err(leaf, slot, "invalid objectid: has=%llu expect=%llu", key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } if (unlikely(item_size != sizeof(*ditem))) { dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", item_size, sizeof(*ditem)); return -EUCLEAN; } ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) { dev_item_err(leaf, slot, "devid mismatch: key has=%llu item has=%llu", key->offset, btrfs_device_id(leaf, ditem)); return -EUCLEAN; } /* * For device total_bytes, we don't have reliable way to check it, as * it can be 0 for device removal. Device size check can only be done * by dev extents check. */ if (unlikely(btrfs_device_bytes_used(leaf, ditem) > btrfs_device_total_bytes(leaf, ditem))) { dev_item_err(leaf, slot, "invalid bytes used: have %llu expect [0, %llu]", btrfs_device_bytes_used(leaf, ditem), btrfs_device_total_bytes(leaf, ditem)); return -EUCLEAN; } /* * Remaining members like io_align/type/gen/dev_group aren't really * utilized. Skip them to make later usage of them easier. */ return 0; } static int check_inode_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_inode_item *iitem; u64 super_gen = btrfs_super_generation(fs_info->super_copy); u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); const u32 item_size = btrfs_item_size(leaf, slot); u32 mode; int ret; u32 flags; u32 ro_flags; ret = check_inode_key(leaf, key, slot); if (unlikely(ret < 0)) return ret; if (unlikely(item_size != sizeof(*iitem))) { generic_err(leaf, slot, "invalid item size: has %u expect %zu", item_size, sizeof(*iitem)); return -EUCLEAN; } iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); /* Here we use super block generation + 1 to handle log tree */ if (unlikely(btrfs_inode_generation(leaf, iitem) > super_gen + 1)) { inode_item_err(leaf, slot, "invalid inode generation: has %llu expect (0, %llu]", btrfs_inode_generation(leaf, iitem), super_gen + 1); return -EUCLEAN; } /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ if (unlikely(btrfs_inode_transid(leaf, iitem) > super_gen + 1)) { inode_item_err(leaf, slot, "invalid inode transid: has %llu expect [0, %llu]", btrfs_inode_transid(leaf, iitem), super_gen + 1); return -EUCLEAN; } /* * For size and nbytes it's better not to be too strict, as for dir * item its size/nbytes can easily get wrong, but doesn't affect * anything in the fs. So here we skip the check. */ mode = btrfs_inode_mode(leaf, iitem); if (unlikely(mode & ~valid_mask)) { inode_item_err(leaf, slot, "unknown mode bit detected: 0x%x", mode & ~valid_mask); return -EUCLEAN; } /* * S_IFMT is not bit mapped so we can't completely rely on * is_power_of_2/has_single_bit_set, but it can save us from checking * FIFO/CHR/DIR/REG. Only needs to check BLK, LNK and SOCKS */ if (!has_single_bit_set(mode & S_IFMT)) { if (unlikely(!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode))) { inode_item_err(leaf, slot, "invalid mode: has 0%o expect valid S_IF* bit(s)", mode & S_IFMT); return -EUCLEAN; } } if (unlikely(S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1)) { inode_item_err(leaf, slot, "invalid nlink: has %u expect no more than 1 for dir", btrfs_inode_nlink(leaf, iitem)); return -EUCLEAN; } btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags); if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) { inode_item_err(leaf, slot, "unknown incompat flags detected: 0x%x", flags); return -EUCLEAN; } if (unlikely(!sb_rdonly(fs_info->sb) && (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) { inode_item_err(leaf, slot, "unknown ro-compat flags detected on writeable mount: 0x%x", ro_flags); return -EUCLEAN; } return 0; } static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_root_item ri = { 0 }; const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY | BTRFS_ROOT_SUBVOL_DEAD; int ret; ret = check_root_key(leaf, key, slot); if (unlikely(ret < 0)) return ret; if (unlikely(btrfs_item_size(leaf, slot) != sizeof(ri) && btrfs_item_size(leaf, slot) != btrfs_legacy_root_item_size())) { generic_err(leaf, slot, "invalid root item size, have %u expect %zu or %u", btrfs_item_size(leaf, slot), sizeof(ri), btrfs_legacy_root_item_size()); return -EUCLEAN; } /* * For legacy root item, the members starting at generation_v2 will be * all filled with 0. * And since we allow geneartion_v2 as 0, it will still pass the check. */ read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot), btrfs_item_size(leaf, slot)); /* Generation related */ if (unlikely(btrfs_root_generation(&ri) > btrfs_super_generation(fs_info->super_copy) + 1)) { generic_err(leaf, slot, "invalid root generation, have %llu expect (0, %llu]", btrfs_root_generation(&ri), btrfs_super_generation(fs_info->super_copy) + 1); return -EUCLEAN; } if (unlikely(btrfs_root_generation_v2(&ri) > btrfs_super_generation(fs_info->super_copy) + 1)) { generic_err(leaf, slot, "invalid root v2 generation, have %llu expect (0, %llu]", btrfs_root_generation_v2(&ri), btrfs_super_generation(fs_info->super_copy) + 1); return -EUCLEAN; } if (unlikely(btrfs_root_last_snapshot(&ri) > btrfs_super_generation(fs_info->super_copy) + 1)) { generic_err(leaf, slot, "invalid root last_snapshot, have %llu expect (0, %llu]", btrfs_root_last_snapshot(&ri), btrfs_super_generation(fs_info->super_copy) + 1); return -EUCLEAN; } /* Alignment and level check */ if (unlikely(!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize))) { generic_err(leaf, slot, "invalid root bytenr, have %llu expect to be aligned to %u", btrfs_root_bytenr(&ri), fs_info->sectorsize); return -EUCLEAN; } if (unlikely(btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL)) { generic_err(leaf, slot, "invalid root level, have %u expect [0, %u]", btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1); return -EUCLEAN; } if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) { generic_err(leaf, slot, "invalid root level, have %u expect [0, %u]", btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1); return -EUCLEAN; } /* Flags check */ if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) { generic_err(leaf, slot, "invalid root flags, have 0x%llx expect mask 0x%llx", btrfs_root_flags(&ri), valid_root_flags); return -EUCLEAN; } return 0; } __printf(3,4) __cold static void extent_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { struct btrfs_key key; struct va_format vaf; va_list args; u64 bytenr; u64 len; btrfs_item_key_to_cpu(eb, &key, slot); bytenr = key.objectid; if (key.type == BTRFS_METADATA_ITEM_KEY || key.type == BTRFS_TREE_BLOCK_REF_KEY || key.type == BTRFS_SHARED_BLOCK_REF_KEY) len = eb->fs_info->nodesize; else len = key.offset; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; btrfs_crit(eb->fs_info, "corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", eb->start, slot, bytenr, len, &vaf); va_end(args); } static int check_extent_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_extent_item *ei; bool is_tree_block = false; unsigned long ptr; /* Current pointer inside inline refs */ unsigned long end; /* Extent item end */ const u32 item_size = btrfs_item_size(leaf, slot); u8 last_type = 0; u64 last_seq = U64_MAX; u64 flags; u64 generation; u64 total_refs; /* Total refs in btrfs_extent_item */ u64 inline_refs = 0; /* found total inline refs */ if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))) { generic_err(leaf, slot, "invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled"); return -EUCLEAN; } /* key->objectid is the bytenr for both key types */ if (unlikely(!IS_ALIGNED(key->objectid, fs_info->sectorsize))) { generic_err(leaf, slot, "invalid key objectid, have %llu expect to be aligned to %u", key->objectid, fs_info->sectorsize); return -EUCLEAN; } /* key->offset is tree level for METADATA_ITEM_KEY */ if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY && key->offset >= BTRFS_MAX_LEVEL)) { extent_err(leaf, slot, "invalid tree level, have %llu expect [0, %u]", key->offset, BTRFS_MAX_LEVEL - 1); return -EUCLEAN; } /* * EXTENT/METADATA_ITEM consists of: * 1) One btrfs_extent_item * Records the total refs, type and generation of the extent. * * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only) * Records the first key and level of the tree block. * * 2) Zero or more btrfs_extent_inline_ref(s) * Each inline ref has one btrfs_extent_inline_ref shows: * 2.1) The ref type, one of the 4 * TREE_BLOCK_REF Tree block only * SHARED_BLOCK_REF Tree block only * EXTENT_DATA_REF Data only * SHARED_DATA_REF Data only * 2.2) Ref type specific data * Either using btrfs_extent_inline_ref::offset, or specific * data structure. * * All above inline items should follow the order: * * - All btrfs_extent_inline_ref::type should be in an ascending * order * * - Within the same type, the items should follow a descending * order by their sequence number. The sequence number is * determined by: * * btrfs_extent_inline_ref::offset for all types other than * EXTENT_DATA_REF * * hash_extent_data_ref() for EXTENT_DATA_REF */ if (unlikely(item_size < sizeof(*ei))) { extent_err(leaf, slot, "invalid item size, have %u expect [%zu, %u)", item_size, sizeof(*ei), BTRFS_LEAF_DATA_SIZE(fs_info)); return -EUCLEAN; } end = item_size + btrfs_item_ptr_offset(leaf, slot); /* Checks against extent_item */ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); flags = btrfs_extent_flags(leaf, ei); total_refs = btrfs_extent_refs(leaf, ei); generation = btrfs_extent_generation(leaf, ei); if (unlikely(generation > btrfs_super_generation(fs_info->super_copy) + 1)) { extent_err(leaf, slot, "invalid generation, have %llu expect (0, %llu]", generation, btrfs_super_generation(fs_info->super_copy) + 1); return -EUCLEAN; } if (unlikely(!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA | BTRFS_EXTENT_FLAG_TREE_BLOCK)))) { extent_err(leaf, slot, "invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx", flags, BTRFS_EXTENT_FLAG_DATA | BTRFS_EXTENT_FLAG_TREE_BLOCK); return -EUCLEAN; } is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK); if (is_tree_block) { if (unlikely(key->type == BTRFS_EXTENT_ITEM_KEY && key->offset != fs_info->nodesize)) { extent_err(leaf, slot, "invalid extent length, have %llu expect %u", key->offset, fs_info->nodesize); return -EUCLEAN; } } else { if (unlikely(key->type != BTRFS_EXTENT_ITEM_KEY)) { extent_err(leaf, slot, "invalid key type, have %u expect %u for data backref", key->type, BTRFS_EXTENT_ITEM_KEY); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(key->offset, fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent length, have %llu expect aligned to %u", key->offset, fs_info->sectorsize); return -EUCLEAN; } if (unlikely(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { extent_err(leaf, slot, "invalid extent flag, data has full backref set"); return -EUCLEAN; } } ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1); /* Check the special case of btrfs_tree_block_info */ if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) { struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)ptr; if (unlikely(btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL)) { extent_err(leaf, slot, "invalid tree block info level, have %u expect [0, %u]", btrfs_tree_block_level(leaf, info), BTRFS_MAX_LEVEL - 1); return -EUCLEAN; } ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1); } /* Check inline refs */ while (ptr < end) { struct btrfs_extent_inline_ref *iref; struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u64 seq; u64 dref_offset; u64 inline_offset; u8 inline_type; if (unlikely(ptr + sizeof(*iref) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %zu end %lu", ptr, sizeof(*iref), end); return -EUCLEAN; } iref = (struct btrfs_extent_inline_ref *)ptr; inline_type = btrfs_extent_inline_ref_type(leaf, iref); inline_offset = btrfs_extent_inline_ref_offset(leaf, iref); seq = inline_offset; if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %u end %lu", ptr, btrfs_extent_inline_ref_size(inline_type), end); return -EUCLEAN; } switch (inline_type) { /* inline_offset is subvolid of the owner, no need to check */ case BTRFS_TREE_BLOCK_REF_KEY: inline_refs++; break; /* Contains parent bytenr */ case BTRFS_SHARED_BLOCK_REF_KEY: if (unlikely(!IS_ALIGNED(inline_offset, fs_info->sectorsize))) { extent_err(leaf, slot, "invalid tree parent bytenr, have %llu expect aligned to %u", inline_offset, fs_info->sectorsize); return -EUCLEAN; } inline_refs++; break; /* * Contains owner subvolid, owner key objectid, adjusted offset. * The only obvious corruption can happen in that offset. */ case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); dref_offset = btrfs_extent_data_ref_offset(leaf, dref); seq = hash_extent_data_ref( btrfs_extent_data_ref_root(leaf, dref), btrfs_extent_data_ref_objectid(leaf, dref), btrfs_extent_data_ref_offset(leaf, dref)); if (unlikely(!IS_ALIGNED(dref_offset, fs_info->sectorsize))) { extent_err(leaf, slot, "invalid data ref offset, have %llu expect aligned to %u", dref_offset, fs_info->sectorsize); return -EUCLEAN; } inline_refs += btrfs_extent_data_ref_count(leaf, dref); break; /* Contains parent bytenr and ref count */ case BTRFS_SHARED_DATA_REF_KEY: sref = (struct btrfs_shared_data_ref *)(iref + 1); if (unlikely(!IS_ALIGNED(inline_offset, fs_info->sectorsize))) { extent_err(leaf, slot, "invalid data parent bytenr, have %llu expect aligned to %u", inline_offset, fs_info->sectorsize); return -EUCLEAN; } inline_refs += btrfs_shared_data_ref_count(leaf, sref); break; case BTRFS_EXTENT_OWNER_REF_KEY: WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); break; default: extent_err(leaf, slot, "unknown inline ref type: %u", inline_type); return -EUCLEAN; } if (inline_type < last_type) { extent_err(leaf, slot, "inline ref out-of-order: has type %u, prev type %u", inline_type, last_type); return -EUCLEAN; } /* Type changed, allow the sequence starts from U64_MAX again. */ if (inline_type > last_type) last_seq = U64_MAX; if (seq > last_seq) { extent_err(leaf, slot, "inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx", inline_type, inline_offset, seq, last_type, last_seq); return -EUCLEAN; } last_type = inline_type; last_seq = seq; ptr += btrfs_extent_inline_ref_size(inline_type); } /* No padding is allowed */ if (unlikely(ptr != end)) { extent_err(leaf, slot, "invalid extent item size, padding bytes found"); return -EUCLEAN; } /* Finally, check the inline refs against total refs */ if (unlikely(inline_refs > total_refs)) { extent_err(leaf, slot, "invalid extent refs, have %llu expect >= inline %llu", total_refs, inline_refs); return -EUCLEAN; } if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) || (prev_key->type == BTRFS_METADATA_ITEM_KEY)) { u64 prev_end = prev_key->objectid; if (prev_key->type == BTRFS_METADATA_ITEM_KEY) prev_end += fs_info->nodesize; else prev_end += prev_key->offset; if (unlikely(prev_end > key->objectid)) { extent_err(leaf, slot, "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]", prev_key->objectid, prev_key->type, prev_key->offset, key->objectid, key->type, key->offset); return -EUCLEAN; } } return 0; } static int check_simple_keyed_refs(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { u32 expect_item_size = 0; if (key->type == BTRFS_SHARED_DATA_REF_KEY) expect_item_size = sizeof(struct btrfs_shared_data_ref); if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) { generic_err(leaf, slot, "invalid item size, have %u expect %u for key type %u", btrfs_item_size(leaf, slot), expect_item_size, key->type); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { generic_err(leaf, slot, "invalid key objectid for shared block ref, have %llu expect aligned to %u", key->objectid, leaf->fs_info->sectorsize); return -EUCLEAN; } if (unlikely(key->type != BTRFS_TREE_BLOCK_REF_KEY && !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid tree parent bytenr, have %llu expect aligned to %u", key->offset, leaf->fs_info->sectorsize); return -EUCLEAN; } return 0; } static int check_extent_data_ref(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_extent_data_ref *dref; unsigned long ptr = btrfs_item_ptr_offset(leaf, slot); const unsigned long end = ptr + btrfs_item_size(leaf, slot); if (unlikely(btrfs_item_size(leaf, slot) % sizeof(*dref) != 0)) { generic_err(leaf, slot, "invalid item size, have %u expect aligned to %zu for key type %u", btrfs_item_size(leaf, slot), sizeof(*dref), key->type); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { generic_err(leaf, slot, "invalid key objectid for shared block ref, have %llu expect aligned to %u", key->objectid, leaf->fs_info->sectorsize); return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { u64 offset; /* * We cannot check the extent_data_ref hash due to possible * overflow from the leaf due to hash collisions. */ dref = (struct btrfs_extent_data_ref *)ptr; offset = btrfs_extent_data_ref_offset(leaf, dref); if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", offset, leaf->fs_info->sectorsize); return -EUCLEAN; } } return 0; } #define inode_ref_err(eb, slot, fmt, args...) \ inode_item_err(eb, slot, fmt, ##args) static int check_inode_ref(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_key *prev_key, int slot) { struct btrfs_inode_ref *iref; unsigned long ptr; unsigned long end; if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) return -EUCLEAN; /* namelen can't be 0, so item_size == sizeof() is also invalid */ if (unlikely(btrfs_item_size(leaf, slot) <= sizeof(*iref))) { inode_ref_err(leaf, slot, "invalid item size, have %u expect (%zu, %u)", btrfs_item_size(leaf, slot), sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); return -EUCLEAN; } ptr = btrfs_item_ptr_offset(leaf, slot); end = ptr + btrfs_item_size(leaf, slot); while (ptr < end) { u16 namelen; if (unlikely(ptr + sizeof(iref) > end)) { inode_ref_err(leaf, slot, "inode ref overflow, ptr %lu end %lu inode_ref_size %zu", ptr, end, sizeof(iref)); return -EUCLEAN; } iref = (struct btrfs_inode_ref *)ptr; namelen = btrfs_inode_ref_name_len(leaf, iref); if (unlikely(ptr + sizeof(*iref) + namelen > end)) { inode_ref_err(leaf, slot, "inode ref overflow, ptr %lu end %lu namelen %u", ptr, end, namelen); return -EUCLEAN; } /* * NOTE: In theory we should record all found index numbers * to find any duplicated indexes, but that will be too time * consuming for inodes with too many hard links. */ ptr += sizeof(*iref) + namelen; } return 0; } static int check_raid_stripe_extent(const struct extent_buffer *leaf, const struct btrfs_key *key, int slot) { struct btrfs_stripe_extent *stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { generic_err(leaf, slot, "invalid key objectid for raid stripe extent, have %llu expect aligned to %u", key->objectid, leaf->fs_info->sectorsize); return -EUCLEAN; } if (unlikely(!btrfs_fs_incompat(leaf->fs_info, RAID_STRIPE_TREE))) { generic_err(leaf, slot, "RAID_STRIPE_EXTENT present but RAID_STRIPE_TREE incompat bit unset"); return -EUCLEAN; } switch (btrfs_stripe_extent_encoding(leaf, stripe_extent)) { case BTRFS_STRIPE_RAID0: case BTRFS_STRIPE_RAID1: case BTRFS_STRIPE_DUP: case BTRFS_STRIPE_RAID10: case BTRFS_STRIPE_RAID5: case BTRFS_STRIPE_RAID6: case BTRFS_STRIPE_RAID1C3: case BTRFS_STRIPE_RAID1C4: break; default: generic_err(leaf, slot, "invalid raid stripe encoding %u", btrfs_stripe_extent_encoding(leaf, stripe_extent)); return -EUCLEAN; } return 0; } /* * Common point to switch the item-specific validation. */ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) { int ret = 0; struct btrfs_chunk *chunk; switch (key->type) { case BTRFS_EXTENT_DATA_KEY: ret = check_extent_data_item(leaf, key, slot, prev_key); break; case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(leaf, key, slot, prev_key); break; case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: case BTRFS_XATTR_ITEM_KEY: ret = check_dir_item(leaf, key, prev_key, slot); break; case BTRFS_INODE_REF_KEY: ret = check_inode_ref(leaf, key, prev_key, slot); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: ret = check_block_group_item(leaf, key, slot); break; case BTRFS_CHUNK_ITEM_KEY: chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); ret = check_leaf_chunk_item(leaf, chunk, key, slot); break; case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf, key, slot); break; case BTRFS_ROOT_ITEM_KEY: ret = check_root_item(leaf, key, slot); break; case BTRFS_EXTENT_ITEM_KEY: case BTRFS_METADATA_ITEM_KEY: ret = check_extent_item(leaf, key, slot, prev_key); break; case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_SHARED_DATA_REF_KEY: case BTRFS_SHARED_BLOCK_REF_KEY: ret = check_simple_keyed_refs(leaf, key, slot); break; case BTRFS_EXTENT_DATA_REF_KEY: ret = check_extent_data_ref(leaf, key, slot); break; case BTRFS_RAID_STRIPE_KEY: ret = check_raid_stripe_extent(leaf, key, slot); break; } if (ret) return BTRFS_TREE_BLOCK_INVALID_ITEM; return BTRFS_TREE_BLOCK_CLEAN; } enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) { struct btrfs_fs_info *fs_info = leaf->fs_info; /* No valid key type is 0, so all key should be larger than this key */ struct btrfs_key prev_key = {0, 0, 0}; struct btrfs_key key; u32 nritems = btrfs_header_nritems(leaf); int slot; if (unlikely(btrfs_header_level(leaf) != 0)) { generic_err(leaf, 0, "invalid level for leaf, have %d expect 0", btrfs_header_level(leaf)); return BTRFS_TREE_BLOCK_INVALID_LEVEL; } /* * Extent buffers from a relocation tree have a owner field that * corresponds to the subvolume tree they are based on. So just from an * extent buffer alone we can not find out what is the id of the * corresponding subvolume tree, so we can not figure out if the extent * buffer corresponds to the root of the relocation tree or not. So * skip this check for relocation trees. */ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { u64 owner = btrfs_header_owner(leaf); /* These trees must never be empty */ if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID || owner == BTRFS_CHUNK_TREE_OBJECTID || owner == BTRFS_DEV_TREE_OBJECTID || owner == BTRFS_FS_TREE_OBJECTID || owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) { generic_err(leaf, 0, "invalid root, root %llu must never be empty", owner); return BTRFS_TREE_BLOCK_INVALID_NRITEMS; } /* Unknown tree */ if (unlikely(owner == 0)) { generic_err(leaf, 0, "invalid owner, root 0 is not defined"); return BTRFS_TREE_BLOCK_INVALID_OWNER; } /* EXTENT_TREE_V2 can have empty extent trees. */ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) return BTRFS_TREE_BLOCK_CLEAN; if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) { generic_err(leaf, 0, "invalid root, root %llu must never be empty", owner); return BTRFS_TREE_BLOCK_INVALID_NRITEMS; } return BTRFS_TREE_BLOCK_CLEAN; } if (unlikely(nritems == 0)) return BTRFS_TREE_BLOCK_CLEAN; /* * Check the following things to make sure this is a good leaf, and * leaf users won't need to bother with similar sanity checks: * * 1) key ordering * 2) item offset and size * No overlap, no hole, all inside the leaf. * 3) item content * If possible, do comprehensive sanity check. * NOTE: All checks must only rely on the item data itself. */ for (slot = 0; slot < nritems; slot++) { u32 item_end_expected; u64 item_data_end; btrfs_item_key_to_cpu(leaf, &key, slot); /* Make sure the keys are in the right order */ if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) { generic_err(leaf, slot, "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", prev_key.objectid, prev_key.type, prev_key.offset, key.objectid, key.type, key.offset); return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; } item_data_end = (u64)btrfs_item_offset(leaf, slot) + btrfs_item_size(leaf, slot); /* * Make sure the offset and ends are right, remember that the * item data starts at the end of the leaf and grows towards the * front. */ if (slot == 0) item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); else item_end_expected = btrfs_item_offset(leaf, slot - 1); if (unlikely(item_data_end != item_end_expected)) { generic_err(leaf, slot, "unexpected item end, have %llu expect %u", item_data_end, item_end_expected); return BTRFS_TREE_BLOCK_INVALID_OFFSETS; } /* * Check to make sure that we don't point outside of the leaf, * just in case all the items are consistent to each other, but * all point outside of the leaf. */ if (unlikely(item_data_end > BTRFS_LEAF_DATA_SIZE(fs_info))) { generic_err(leaf, slot, "slot end outside of leaf, have %llu expect range [0, %u]", item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info)); return BTRFS_TREE_BLOCK_INVALID_OFFSETS; } /* Also check if the item pointer overlaps with btrfs item. */ if (unlikely(btrfs_item_ptr_offset(leaf, slot) < btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item))) { generic_err(leaf, slot, "slot overlaps with its data, item end %lu data start %lu", btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item), btrfs_item_ptr_offset(leaf, slot)); return BTRFS_TREE_BLOCK_INVALID_OFFSETS; } /* * We only want to do this if WRITTEN is set, otherwise the leaf * may be in some intermediate state and won't appear valid. */ if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) { enum btrfs_tree_block_status ret; /* * Check if the item size and content meet other * criteria */ ret = check_leaf_item(leaf, &key, slot, &prev_key); if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) return ret; } prev_key.objectid = key.objectid; prev_key.type = key.type; prev_key.offset = key.offset; } return BTRFS_TREE_BLOCK_CLEAN; } int btrfs_check_leaf(struct extent_buffer *leaf) { enum btrfs_tree_block_status ret; ret = __btrfs_check_leaf(leaf); if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) return -EUCLEAN; return 0; } ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO); enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node) { struct btrfs_fs_info *fs_info = node->fs_info; unsigned long nr = btrfs_header_nritems(node); struct btrfs_key key, next_key; int slot; int level = btrfs_header_level(node); u64 bytenr; if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) { generic_err(node, 0, "invalid level for node, have %d expect [1, %d]", level, BTRFS_MAX_LEVEL - 1); return BTRFS_TREE_BLOCK_INVALID_LEVEL; } if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) { btrfs_crit(fs_info, "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", btrfs_header_owner(node), node->start, nr == 0 ? "small" : "large", nr, BTRFS_NODEPTRS_PER_BLOCK(fs_info)); return BTRFS_TREE_BLOCK_INVALID_NRITEMS; } for (slot = 0; slot < nr - 1; slot++) { bytenr = btrfs_node_blockptr(node, slot); btrfs_node_key_to_cpu(node, &key, slot); btrfs_node_key_to_cpu(node, &next_key, slot + 1); if (unlikely(!bytenr)) { generic_err(node, slot, "invalid NULL node pointer"); return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; } if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) { generic_err(node, slot, "unaligned pointer, have %llu should be aligned to %u", bytenr, fs_info->sectorsize); return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; } if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) { generic_err(node, slot, "bad key order, current (%llu %u %llu) next (%llu %u %llu)", key.objectid, key.type, key.offset, next_key.objectid, next_key.type, next_key.offset); return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; } } return BTRFS_TREE_BLOCK_CLEAN; } int btrfs_check_node(struct extent_buffer *node) { enum btrfs_tree_block_status ret; ret = __btrfs_check_node(node); if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) return -EUCLEAN; return 0; } ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO); int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner) { const bool is_subvol = is_fstree(root_owner); const u64 eb_owner = btrfs_header_owner(eb); /* * Skip dummy fs, as selftests don't create unique ebs for each dummy * root. */ if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &eb->fs_info->fs_state)) return 0; /* * There are several call sites (backref walking, qgroup, and data * reloc) passing 0 as @root_owner, as they are not holding the * tree root. In that case, we can not do a reliable ownership check, * so just exit. */ if (root_owner == 0) return 0; /* * These trees use key.offset as their owner, our callers don't have * the extra capacity to pass key.offset here. So we just skip them. */ if (root_owner == BTRFS_TREE_LOG_OBJECTID || root_owner == BTRFS_TREE_RELOC_OBJECTID) return 0; if (!is_subvol) { /* For non-subvolume trees, the eb owner should match root owner */ if (unlikely(root_owner != eb_owner)) { btrfs_crit(eb->fs_info, "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect %llu", btrfs_header_level(eb) == 0 ? "leaf" : "node", root_owner, btrfs_header_bytenr(eb), eb_owner, root_owner); return -EUCLEAN; } return 0; } /* * For subvolume trees, owners can mismatch, but they should all belong * to subvolume trees. */ if (unlikely(is_subvol != is_fstree(eb_owner))) { btrfs_crit(eb->fs_info, "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]", btrfs_header_level(eb) == 0 ? "leaf" : "node", root_owner, btrfs_header_bytenr(eb), eb_owner, BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID); return -EUCLEAN; } return 0; } int btrfs_verify_level_key(struct extent_buffer *eb, int level, struct btrfs_key *first_key, u64 parent_transid) { struct btrfs_fs_info *fs_info = eb->fs_info; int found_level; struct btrfs_key found_key; int ret; found_level = btrfs_header_level(eb); if (found_level != level) { WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), KERN_ERR "BTRFS: tree level check failed\n"); btrfs_err(fs_info, "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", eb->start, level, found_level); return -EIO; } if (!first_key) return 0; /* * For live tree block (new tree blocks in current transaction), * we need proper lock context to avoid race, which is impossible here. * So we only checks tree blocks which is read from disk, whose * generation <= fs_info->last_trans_committed. */ if (btrfs_header_generation(eb) > btrfs_get_last_trans_committed(fs_info)) return 0; /* We have @first_key, so this @eb must have at least one item */ if (btrfs_header_nritems(eb) == 0) { btrfs_err(fs_info, "invalid tree nritems, bytenr=%llu nritems=0 expect >0", eb->start); WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); return -EUCLEAN; } if (found_level) btrfs_node_key_to_cpu(eb, &found_key, 0); else btrfs_item_key_to_cpu(eb, &found_key, 0); ret = btrfs_comp_cpu_keys(first_key, &found_key); if (ret) { WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), KERN_ERR "BTRFS: tree first key check failed\n"); btrfs_err(fs_info, "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", eb->start, parent_transid, first_key->objectid, first_key->type, first_key->offset, found_key.objectid, found_key.type, found_key.offset); } return ret; }
2339 1856 1119 2426 2429 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_RCUREF_H #define _LINUX_RCUREF_H #include <linux/atomic.h> #include <linux/bug.h> #include <linux/limits.h> #include <linux/lockdep.h> #include <linux/preempt.h> #include <linux/rcupdate.h> #define RCUREF_ONEREF 0x00000000U #define RCUREF_MAXREF 0x7FFFFFFFU #define RCUREF_SATURATED 0xA0000000U #define RCUREF_RELEASED 0xC0000000U #define RCUREF_DEAD 0xE0000000U #define RCUREF_NOREF 0xFFFFFFFFU /** * rcuref_init - Initialize a rcuref reference count with the given reference count * @ref: Pointer to the reference count * @cnt: The initial reference count typically '1' */ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) { atomic_set(&ref->refcnt, cnt - 1); } /** * rcuref_read - Read the number of held reference counts of a rcuref * @ref: Pointer to the reference count * * Return: The number of held references (0 ... N) */ static inline unsigned int rcuref_read(rcuref_t *ref) { unsigned int c = atomic_read(&ref->refcnt); /* Return 0 if within the DEAD zone. */ return c >= RCUREF_RELEASED ? 0 : c + 1; } extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); /** * rcuref_get - Acquire one reference on a rcuref reference count * @ref: Pointer to the reference count * * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency * and thereby orders future stores. See documentation in lib/rcuref.c * * Return: * False if the attempt to acquire a reference failed. This happens * when the last reference has been put already * * True if a reference was successfully acquired */ static inline __must_check bool rcuref_get(rcuref_t *ref) { /* * Unconditionally increase the reference count. The saturation and * dead zones provide enough tolerance for this. */ if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt))) return true; /* Handle the cases inside the saturation and dead zones */ return rcuref_get_slowpath(ref); } extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); /* * Internal helper. Do not invoke directly. */ static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) { RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), "suspicious rcuref_put_rcusafe() usage"); /* * Unconditionally decrease the reference count. The saturation and * dead zones provide enough tolerance for this. */ if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) return false; /* * Handle the last reference drop and cases inside the saturation * and dead zones. */ return rcuref_put_slowpath(ref); } /** * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe * @ref: Pointer to the reference count * * Provides release memory ordering, such that prior loads and stores are done * before, and provides an acquire ordering on success such that free() * must come after. * * Can be invoked from contexts, which guarantee that no grace period can * happen which would free the object concurrently if the decrement drops * the last reference and the slowpath races against a concurrent get() and * put() pair. rcu_read_lock()'ed and atomic contexts qualify. * * Return: * True if this was the last reference with no future references * possible. This signals the caller that it can safely release the * object which is protected by the reference counter. * * False if there are still active references or the put() raced * with a concurrent get()/put() pair. Caller is not allowed to * release the protected object. */ static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref) { return __rcuref_put(ref); } /** * rcuref_put -- Release one reference for a rcuref reference count * @ref: Pointer to the reference count * * Can be invoked from any context. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides an acquire ordering on success such that free() * must come after. * * Return: * * True if this was the last reference with no future references * possible. This signals the caller that it can safely schedule the * object, which is protected by the reference counter, for * deconstruction. * * False if there are still active references or the put() raced * with a concurrent get()/put() pair. Caller is not allowed to * deconstruct the protected object. */ static inline __must_check bool rcuref_put(rcuref_t *ref) { bool released; preempt_disable(); released = __rcuref_put(ref); preempt_enable(); return released; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for * licensing and copyright details */ #include <linux/reiserfs_fs.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/bug.h> #include <linux/workqueue.h> #include <asm/unaligned.h> #include <linux/bitops.h> #include <linux/proc_fs.h> #include <linux/buffer_head.h> /* the 32 bit compat definitions with int argument */ #define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) #define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION #define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION struct reiserfs_journal_list; /* bitmasks for i_flags field in reiserfs-specific part of inode */ typedef enum { /* * this says what format of key do all items (but stat data) of * an object have. If this is set, that format is 3.6 otherwise - 3.5 */ i_item_key_version_mask = 0x0001, /* * If this is unset, object has 3.5 stat data, otherwise, * it has 3.6 stat data with 64bit size, 32bit nlink etc. */ i_stat_data_version_mask = 0x0002, /* file might need tail packing on close */ i_pack_on_close_mask = 0x0004, /* don't pack tail of file */ i_nopack_mask = 0x0008, /* * If either of these are set, "safe link" was created for this * file during truncate or unlink. Safe link is used to avoid * leakage of disk space on crash with some files open, but unlinked. */ i_link_saved_unlink_mask = 0x0010, i_link_saved_truncate_mask = 0x0020, i_has_xattr_dir = 0x0040, i_data_log = 0x0080, } reiserfs_inode_flags; struct reiserfs_inode_info { __u32 i_key[4]; /* key is still 4 32 bit integers */ /* * transient inode flags that are never stored on disk. Bitmasks * for this field are defined above. */ __u32 i_flags; /* offset of first byte stored in direct item. */ __u32 i_first_direct_byte; /* copy of persistent inode flags read from sd_attrs. */ __u32 i_attrs; /* first unused block of a sequence of unused blocks */ int i_prealloc_block; int i_prealloc_count; /* length of that sequence */ /* per-transaction list of inodes which have preallocated blocks */ struct list_head i_prealloc_list; /* * new_packing_locality is created; new blocks for the contents * of this directory should be displaced */ unsigned new_packing_locality:1; /* * we use these for fsync or O_SYNC to decide which transaction * needs to be committed in order for this inode to be properly * flushed */ unsigned int i_trans_id; struct reiserfs_journal_list *i_jl; atomic_t openers; struct mutex tailpack; #ifdef CONFIG_REISERFS_FS_XATTR struct rw_semaphore i_xattr_sem; #endif #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; #endif struct inode vfs_inode; }; typedef enum { reiserfs_attrs_cleared = 0x00000001, } reiserfs_super_block_flags; /* * struct reiserfs_super_block accessors/mutators since this is a disk * structure, it will always be in little endian format. */ #define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) #define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) #define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) #define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v)) #define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block)) #define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v)) #define sb_jp_journal_1st_block(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block)) #define set_sb_jp_journal_1st_block(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v)) #define sb_jp_journal_dev(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev)) #define set_sb_jp_journal_dev(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v)) #define sb_jp_journal_size(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size)) #define set_sb_jp_journal_size(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v)) #define sb_jp_journal_trans_max(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max)) #define set_sb_jp_journal_trans_max(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v)) #define sb_jp_journal_magic(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic)) #define set_sb_jp_journal_magic(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v)) #define sb_jp_journal_max_batch(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch)) #define set_sb_jp_journal_max_batch(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v)) #define sb_jp_jourmal_max_commit_age(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age)) #define set_sb_jp_journal_max_commit_age(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v)) #define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize)) #define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v)) #define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize)) #define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v)) #define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize)) #define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v)) #define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state)) #define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v)) #define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state)) #define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v)) #define sb_hash_function_code(sbp) \ (le32_to_cpu((sbp)->s_v1.s_hash_function_code)) #define set_sb_hash_function_code(sbp,v) \ ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v)) #define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height)) #define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v)) #define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr)) #define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v)) #define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version)) #define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v)) #define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count)) #define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v)) #define sb_reserved_for_journal(sbp) \ (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal)) #define set_sb_reserved_for_journal(sbp,v) \ ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v)) /* LOGGING -- */ /* * These all interelate for performance. * * If the journal block count is smaller than n transactions, you lose speed. * I don't know what n is yet, I'm guessing 8-16. * * typical transaction size depends on the application, how often fsync is * called, and how many metadata blocks you dirty in a 30 second period. * The more small files (<16k) you use, the larger your transactions will * be. * * If your journal fills faster than dirty buffers get flushed to disk, it * must flush them before allowing the journal to wrap, which slows things * down. If you need high speed meta data updates, the journal should be * big enough to prevent wrapping before dirty meta blocks get to disk. * * If the batch max is smaller than the transaction max, you'll waste space * at the end of the journal because journal_end sets the next transaction * to start at 0 if the next transaction has any chance of wrapping. * * The large the batch max age, the better the speed, and the more meta * data changes you'll lose after a crash. */ /* don't mess with these for a while */ /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ #define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ #define JOURNAL_HASH_SIZE 8192 /* number of copies of the bitmaps to have floating. Must be >= 2 */ #define JOURNAL_NUM_BITMAPS 5 /* * One of these for every block in every transaction * Each one is in two hash tables. First, a hash of the current transaction, * and after journal_end, a hash of all the in memory transactions. * next and prev are used by the current transaction (journal_hash). * hnext and hprev are used by journal_list_hash. If a block is in more * than one transaction, the journal_list_hash links it in multiple times. * This allows flush_journal_list to remove just the cnode belonging to a * given transaction. */ struct reiserfs_journal_cnode { struct buffer_head *bh; /* real buffer head */ struct super_block *sb; /* dev of real buffer head */ /* block number of real buffer head, == 0 when buffer on disk */ __u32 blocknr; unsigned long state; /* journal list this cnode lives in */ struct reiserfs_journal_list *jlist; struct reiserfs_journal_cnode *next; /* next in transaction list */ struct reiserfs_journal_cnode *prev; /* prev in transaction list */ struct reiserfs_journal_cnode *hprev; /* prev in hash list */ struct reiserfs_journal_cnode *hnext; /* next in hash list */ }; struct reiserfs_bitmap_node { int id; char *data; struct list_head list; }; struct reiserfs_list_bitmap { struct reiserfs_journal_list *journal_list; struct reiserfs_bitmap_node **bitmaps; }; /* * one of these for each transaction. The most important part here is the * j_realblock. this list of cnodes is used to hash all the blocks in all * the commits, to mark all the real buffer heads dirty once all the commits * hit the disk, and to make sure every real block in a transaction is on * disk before allowing the log area to be overwritten */ struct reiserfs_journal_list { unsigned long j_start; unsigned long j_state; unsigned long j_len; atomic_t j_nonzerolen; atomic_t j_commit_left; /* all commits older than this on disk */ atomic_t j_older_commits_done; struct mutex j_commit_mutex; unsigned int j_trans_id; time64_t j_timestamp; /* write-only but useful for crash dump analysis */ struct reiserfs_list_bitmap *j_list_bitmap; struct buffer_head *j_commit_bh; /* commit buffer head */ struct reiserfs_journal_cnode *j_realblock; struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */ /* time ordered list of all active transactions */ struct list_head j_list; /* * time ordered list of all transactions we haven't tried * to flush yet */ struct list_head j_working_list; /* list of tail conversion targets in need of flush before commit */ struct list_head j_tail_bh_list; /* list of data=ordered buffers in need of flush before commit */ struct list_head j_bh_list; int j_refcount; }; struct reiserfs_journal { struct buffer_head **j_ap_blocks; /* journal blocks on disk */ /* newest journal block */ struct reiserfs_journal_cnode *j_last; /* oldest journal block. start here for traverse */ struct reiserfs_journal_cnode *j_first; struct bdev_handle *j_bdev_handle; /* first block on s_dev of reserved area journal */ int j_1st_reserved_block; unsigned long j_state; unsigned int j_trans_id; unsigned long j_mount_id; /* start of current waiting commit (index into j_ap_blocks) */ unsigned long j_start; unsigned long j_len; /* length of current waiting commit */ /* number of buffers requested by journal_begin() */ unsigned long j_len_alloc; atomic_t j_wcount; /* count of writers for current commit */ /* batch count. allows turning X transactions into 1 */ unsigned long j_bcount; /* first unflushed transactions offset */ unsigned long j_first_unflushed_offset; /* last fully flushed journal timestamp */ unsigned j_last_flush_trans_id; struct buffer_head *j_header_bh; time64_t j_trans_start_time; /* time this transaction started */ struct mutex j_mutex; struct mutex j_flush_mutex; /* wait for current transaction to finish before starting new one */ wait_queue_head_t j_join_wait; atomic_t j_jlock; /* lock for j_join_wait */ int j_list_bitmap_index; /* number of next list bitmap to use */ /* no more journal begins allowed. MUST sleep on j_join_wait */ int j_must_wait; /* next journal_end will flush all journal list */ int j_next_full_flush; /* next journal_end will flush all async commits */ int j_next_async_flush; int j_cnode_used; /* number of cnodes on the used list */ int j_cnode_free; /* number of cnodes on the free list */ /* max number of blocks in a transaction. */ unsigned int j_trans_max; /* max number of blocks to batch into a trans */ unsigned int j_max_batch; /* in seconds, how old can an async commit be */ unsigned int j_max_commit_age; /* in seconds, how old can a transaction be */ unsigned int j_max_trans_age; /* the default for the max commit age */ unsigned int j_default_max_commit_age; struct reiserfs_journal_cnode *j_cnode_free_list; /* orig pointer returned from vmalloc */ struct reiserfs_journal_cnode *j_cnode_free_orig; struct reiserfs_journal_list *j_current_jl; int j_free_bitmap_nodes; int j_used_bitmap_nodes; int j_num_lists; /* total number of active transactions */ int j_num_work_lists; /* number that need attention from kreiserfsd */ /* debugging to make sure things are flushed in order */ unsigned int j_last_flush_id; /* debugging to make sure things are committed in order */ unsigned int j_last_commit_id; struct list_head j_bitmap_nodes; struct list_head j_dirty_buffers; spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */ /* list of all active transactions */ struct list_head j_journal_list; /* lists that haven't been touched by writeback attempts */ struct list_head j_working_list; /* hash table for real buffer heads in current trans */ struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all the transactions */ struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* array of bitmaps to record the deleted blocks */ struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* list of inodes which have preallocated blocks */ struct list_head j_prealloc_list; int j_persistent_trans; unsigned long j_max_trans_size; unsigned long j_max_batch_size; int j_errno; /* when flushing ordered buffers, throttle new ordered writers */ struct delayed_work j_work; struct super_block *j_work_sb; atomic_t j_async_throttle; }; enum journal_state_bits { J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ J_WRITERS_QUEUED, /* set when log is full due to too many writers */ J_ABORTED, /* set when log is aborted */ }; /* ick. magic string to find desc blocks in the journal */ #define JOURNAL_DESC_MAGIC "ReIsErLB" typedef __u32(*hashf_t) (const signed char *, int); struct reiserfs_bitmap_info { __u32 free_count; }; struct proc_dir_entry; #if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) typedef unsigned long int stat_cnt_t; typedef struct reiserfs_proc_info_data { spinlock_t lock; int exiting; int max_hash_collisions; stat_cnt_t breads; stat_cnt_t bread_miss; stat_cnt_t search_by_key; stat_cnt_t search_by_key_fs_changed; stat_cnt_t search_by_key_restarted; stat_cnt_t insert_item_restarted; stat_cnt_t paste_into_item_restarted; stat_cnt_t cut_from_item_restarted; stat_cnt_t delete_solid_item_restarted; stat_cnt_t delete_item_restarted; stat_cnt_t leaked_oid; stat_cnt_t leaves_removable; /* * balances per level. * Use explicit 5 as MAX_HEIGHT is not visible yet. */ stat_cnt_t balance_at[5]; /* XXX */ /* sbk == search_by_key */ stat_cnt_t sbk_read_at[5]; /* XXX */ stat_cnt_t sbk_fs_changed[5]; stat_cnt_t sbk_restarted[5]; stat_cnt_t items_at[5]; /* XXX */ stat_cnt_t free_at[5]; /* XXX */ stat_cnt_t can_node_be_removed[5]; /* XXX */ long int lnum[5]; /* XXX */ long int rnum[5]; /* XXX */ long int lbytes[5]; /* XXX */ long int rbytes[5]; /* XXX */ stat_cnt_t get_neighbors[5]; stat_cnt_t get_neighbors_restart[5]; stat_cnt_t need_l_neighbor[5]; stat_cnt_t need_r_neighbor[5]; stat_cnt_t free_block; struct __scan_bitmap_stats { stat_cnt_t call; stat_cnt_t wait; stat_cnt_t bmap; stat_cnt_t retry; stat_cnt_t in_journal_hint; stat_cnt_t in_journal_nohint; stat_cnt_t stolen; } scan_bitmap; struct __journal_stats { stat_cnt_t in_journal; stat_cnt_t in_journal_bitmap; stat_cnt_t in_journal_reusable; stat_cnt_t lock_journal; stat_cnt_t lock_journal_wait; stat_cnt_t journal_being; stat_cnt_t journal_relock_writers; stat_cnt_t journal_relock_wcount; stat_cnt_t mark_dirty; stat_cnt_t mark_dirty_already; stat_cnt_t mark_dirty_notjournal; stat_cnt_t restore_prepared; stat_cnt_t prepare; stat_cnt_t prepare_retry; } journal; } reiserfs_proc_info_data_t; #else typedef struct reiserfs_proc_info_data { } reiserfs_proc_info_data_t; #endif /* Number of quota types we support */ #define REISERFS_MAXQUOTAS 2 /* reiserfs union of in-core super block data */ struct reiserfs_sb_info { /* Buffer containing the super block */ struct buffer_head *s_sbh; /* Pointer to the on-disk super block in the buffer */ struct reiserfs_super_block *s_rs; struct reiserfs_bitmap_info *s_ap_bitmap; /* pointer to journal information */ struct reiserfs_journal *s_journal; unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ /* Serialize writers access, replace the old bkl */ struct mutex lock; /* Owner of the lock (can be recursive) */ struct task_struct *lock_owner; /* Depth of the lock, start from -1 like the bkl */ int lock_depth; struct workqueue_struct *commit_wq; /* Comment? -Hans */ void (*end_io_handler) (struct buffer_head *, int); /* * pointer to function which is used to sort names in directory. * Set on mount */ hashf_t s_hash_function; /* reiserfs's mount options are set here */ unsigned long s_mount_opt; /* This is a structure that describes block allocator options */ struct { /* Bitfield for enable/disable kind of options */ unsigned long bits; /* * size started from which we consider file * to be a large one (in blocks) */ unsigned long large_file_size; int border; /* percentage of disk, border takes */ /* * Minimal file size (in blocks) starting * from which we do preallocations */ int preallocmin; /* * Number of blocks we try to prealloc when file * reaches preallocmin size (in blocks) or prealloc_list is empty. */ int preallocsize; } s_alloc_options; /* Comment? -Hans */ wait_queue_head_t s_wait; /* increased by one every time the tree gets re-balanced */ atomic_t s_generation_counter; /* File system properties. Currently holds on-disk FS format */ unsigned long s_properties; /* session statistics */ int s_disk_reads; int s_disk_writes; int s_fix_nodes; int s_do_balance; int s_unneeded_left_neighbor; int s_good_search_by_key_reada; int s_bmaps; int s_bmaps_without_search; int s_direct2indirect; int s_indirect2direct; /* * set up when it's ok for reiserfs_read_inode2() to read from * disk inode with nlink==0. Currently this is only used during * finish_unfinished() processing at mount time */ int s_is_unlinked_ok; reiserfs_proc_info_data_t s_proc_info_data; struct proc_dir_entry *procdir; /* amount of blocks reserved for further allocations */ int reserved_blocks; /* this lock on now only used to protect reserved_blocks variable */ spinlock_t bitmap_lock; struct dentry *priv_root; /* root of /.reiserfs_priv */ struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; int work_queued; /* non-zero delayed work is queued */ struct delayed_work old_work; /* old transactions flush delayed work */ spinlock_t old_work_lock; /* protects old_work and work_queued */ #ifdef CONFIG_QUOTA char *s_qf_names[REISERFS_MAXQUOTAS]; int s_jquota_fmt; #endif char *s_jdev; /* Stored jdev for mount option showing */ #ifdef CONFIG_REISERFS_CHECK /* * Detects whether more than one copy of tb exists per superblock * as a means of checking whether do_balance is executing * concurrently against another tree reader/writer on a same * mount point. */ struct tree_balance *cur_tb; #endif }; /* Definitions of reiserfs on-disk properties: */ #define REISERFS_3_5 0 #define REISERFS_3_6 1 #define REISERFS_OLD_FORMAT 2 /* Mount options */ enum reiserfs_mount_options { /* large tails will be created in a session */ REISERFS_LARGETAIL, /* * small (for files less than block size) tails will * be created in a session */ REISERFS_SMALLTAIL, /* replay journal and return 0. Use by fsck */ REPLAYONLY, /* * -o conv: causes conversion of old format super block to the * new format. If not specified - old partition will be dealt * with in a manner of 3.5.x */ REISERFS_CONVERT, /* * -o hash={tea, rupasov, r5, detect} is meant for properly mounting * reiserfs disks from 3.5.19 or earlier. 99% of the time, this * option is not required. If the normal autodection code can't * determine which hash to use (because both hashes had the same * value for a file) use this option to force a specific hash. * It won't allow you to override the existing hash on the FS, so * if you have a tea hash disk, and mount with -o hash=rupasov, * the mount will fail. */ FORCE_TEA_HASH, /* try to force tea hash on mount */ FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ FORCE_R5_HASH, /* try to force rupasov hash on mount */ FORCE_HASH_DETECT, /* try to detect hash function on mount */ REISERFS_DATA_LOG, REISERFS_DATA_ORDERED, REISERFS_DATA_WRITEBACK, /* * used for testing experimental features, makes benchmarking new * features with and without more convenient, should never be used by * users in any code shipped to users (ideally) */ REISERFS_NO_BORDER, REISERFS_NO_UNHASHED_RELOCATION, REISERFS_HASHED_RELOCATION, REISERFS_ATTRS, REISERFS_XATTRS_USER, REISERFS_POSIXACL, REISERFS_EXPOSE_PRIVROOT, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, /* Actions on error */ REISERFS_ERROR_PANIC, REISERFS_ERROR_RO, REISERFS_ERROR_CONTINUE, REISERFS_USRQUOTA, /* User quota option specified */ REISERFS_GRPQUOTA, /* Group quota option specified */ REISERFS_TEST1, REISERFS_TEST2, REISERFS_TEST3, REISERFS_TEST4, REISERFS_UNSUPPORTED_OPT, }; #define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH)) #define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH)) #define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH)) #define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT)) #define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER)) #define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION)) #define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) #define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4)) #define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL)) #define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL)) #define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY)) #define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS)) #define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5)) #define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT)) #define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) #define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) #define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) #define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC)) #define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO)) void reiserfs_file_buffer(struct buffer_head *bh, int list); extern struct file_system_type reiserfs_fs_type; int reiserfs_resize(struct super_block *, unsigned long); #define CARRY_ON 0 #define SCHEDULE_OCCURRED 1 #define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh) #define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal) #define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block) #define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) #define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap) #define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->) #define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal))) static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal *journal) { return test_bit(J_ABORTED, &journal->j_state); } /* * Locking primitives. The write lock is a per superblock * special mutex that has properties close to the Big Kernel Lock * which was used in the previous locking scheme. */ void reiserfs_write_lock(struct super_block *s); void reiserfs_write_unlock(struct super_block *s); int __must_check reiserfs_write_unlock_nested(struct super_block *s); void reiserfs_write_lock_nested(struct super_block *s, int depth); #ifdef CONFIG_REISERFS_CHECK void reiserfs_lock_check_recursive(struct super_block *s); #else static inline void reiserfs_lock_check_recursive(struct super_block *s) { } #endif /* * Several mutexes depend on the write lock. * However sometimes we want to relax the write lock while we hold * these mutexes, according to the release/reacquire on schedule() * properties of the Bkl that were used. * Reiserfs performances and locking were based on this scheme. * Now that the write lock is a mutex and not the bkl anymore, doing so * may result in a deadlock: * * A acquire write_lock * A acquire j_commit_mutex * A release write_lock and wait for something * B acquire write_lock * B can't acquire j_commit_mutex and sleep * A can't acquire write lock anymore * deadlock * * What we do here is avoiding such deadlock by playing the same game * than the Bkl: if we can't acquire a mutex that depends on the write lock, * we release the write lock, wait a bit and then retry. * * The mutexes concerned by this hack are: * - The commit mutex of a journal list * - The flush mutex * - The journal lock * - The inode mutex */ static inline void reiserfs_mutex_lock_safe(struct mutex *m, struct super_block *s) { int depth; depth = reiserfs_write_unlock_nested(s); mutex_lock(m); reiserfs_write_lock_nested(s, depth); } static inline void reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, struct super_block *s) { int depth; depth = reiserfs_write_unlock_nested(s); mutex_lock_nested(m, subclass); reiserfs_write_lock_nested(s, depth); } static inline void reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) { int depth; depth = reiserfs_write_unlock_nested(s); down_read(sem); reiserfs_write_lock_nested(s, depth); } /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. */ static inline void reiserfs_cond_resched(struct super_block *s) { if (need_resched()) { int depth; depth = reiserfs_write_unlock_nested(s); schedule(); reiserfs_write_lock_nested(s, depth); } } struct fid; /* * in reading the #defines, it may help to understand that they employ * the following abbreviations: * * B = Buffer * I = Item header * H = Height within the tree (should be changed to LEV) * N = Number of the item in the node * STAT = stat data * DEH = Directory Entry Header * EC = Entry Count * E = Entry number * UL = Unsigned Long * BLKH = BLocK Header * UNFM = UNForMatted node * DC = Disk Child * P = Path * * These #defines are named by concatenating these abbreviations, * where first comes the arguments, and last comes the return value, * of the macro. */ #define USE_INODE_GENERATION_COUNTER #define REISERFS_PREALLOCATE #define DISPLACE_NEW_PACKING_LOCALITIES #define PREALLOCATION_SIZE 9 /* n must be power of 2 */ #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) /* * to be ok for alpha and others we have to align structures to 8 byte * boundary. * FIXME: do not change 4 by anything else: there is code which relies on that */ #define ROUND_UP(x) _ROUND_UP(x,8LL) /* * debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug * messages. */ #define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ void __reiserfs_warning(struct super_block *s, const char *id, const char *func, const char *fmt, ...); #define reiserfs_warning(s, id, fmt, args...) \ __reiserfs_warning(s, id, __func__, fmt, ##args) /* assertions handling */ /* always check a condition and panic if it's false. */ #define __RASSERT(cond, scond, format, args...) \ do { \ if (!(cond)) \ reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ __FILE__ ":%i:%s: " format "\n", \ __LINE__, __func__ , ##args); \ } while (0) #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) #if defined( CONFIG_REISERFS_CHECK ) #define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args) #else #define RFALSE( cond, format, args... ) do {;} while( 0 ) #endif #define CONSTF __attribute_const__ /* * Disk Data Structures */ /*************************************************************************** * SUPER BLOCK * ***************************************************************************/ /* * Structure of super block on disk, a version of which in RAM is often * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger * structure containing fields never written to disk. */ #define UNSET_HASH 0 /* Detect hash on disk */ #define TEA_HASH 1 #define YURA_HASH 2 #define R5_HASH 3 #define DEFAULT_HASH R5_HASH struct journal_params { /* where does journal start from on its * device */ __le32 jp_journal_1st_block; /* journal device st_rdev */ __le32 jp_journal_dev; /* size of the journal */ __le32 jp_journal_size; /* max number of blocks in a transaction. */ __le32 jp_journal_trans_max; /* * random value made on fs creation * (this was sb_journal_block_count) */ __le32 jp_journal_magic; /* max number of blocks to batch into a trans */ __le32 jp_journal_max_batch; /* in seconds, how old can an async commit be */ __le32 jp_journal_max_commit_age; /* in seconds, how old can a transaction be */ __le32 jp_journal_max_trans_age; }; /* this is the super from 3.5.X, where X >= 10 */ struct reiserfs_super_block_v1 { __le32 s_block_count; /* blocks count */ __le32 s_free_blocks; /* free blocks count */ __le32 s_root_block; /* root block number */ struct journal_params s_journal; __le16 s_blocksize; /* block size */ /* max size of object id array, see get_objectid() commentary */ __le16 s_oid_maxsize; __le16 s_oid_cursize; /* current size of object id array */ /* this is set to 1 when filesystem was umounted, to 2 - when not */ __le16 s_umount_state; /* * reiserfs magic string indicates that file system is reiserfs: * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ char s_magic[10]; /* * it is set to used by fsck to mark which * phase of rebuilding is done */ __le16 s_fs_state; /* * indicate, what hash function is being use * to sort names in a directory */ __le32 s_hash_function_code; __le16 s_tree_height; /* height of disk tree */ /* * amount of bitmap blocks needed to address * each block of file system */ __le16 s_bmap_nr; /* * this field is only reliable on filesystem with non-standard journal */ __le16 s_version; /* * size in blocks of journal area on main device, we need to * keep after making fs with non-standard journal */ __le16 s_reserved_for_journal; } __attribute__ ((__packed__)); #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) /* this is the on disk super block */ struct reiserfs_super_block { struct reiserfs_super_block_v1 s_v1; __le32 s_inode_generation; /* Right now used only by inode-attributes, if enabled */ __le32 s_flags; unsigned char s_uuid[16]; /* filesystem unique identifier */ unsigned char s_label[16]; /* filesystem volume label */ __le16 s_mnt_count; /* Count of mounts since last fsck */ __le16 s_max_mnt_count; /* Maximum mounts before check */ __le32 s_lastcheck; /* Timestamp of last fsck */ __le32 s_check_interval; /* Interval between checks */ /* * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1() * so any additions must be updated there as well. */ char s_unused[76]; } __attribute__ ((__packed__)); #define SB_SIZE (sizeof(struct reiserfs_super_block)) #define REISERFS_VERSION_1 0 #define REISERFS_VERSION_2 2 /* on-disk super block fields converted to cpu form */ #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) #define SB_BLOCKSIZE(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize)) #define SB_BLOCK_COUNT(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count)) #define SB_FREE_BLOCKS(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks)) #define SB_REISERFS_MAGIC(s) \ (SB_V1_DISK_SUPER_BLOCK(s)->s_magic) #define SB_ROOT_BLOCK(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block)) #define SB_TREE_HEIGHT(s) \ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height)) #define SB_REISERFS_STATE(s) \ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state)) #define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version)) #define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr)) #define PUT_SB_BLOCK_COUNT(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0) #define PUT_SB_FREE_BLOCKS(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0) #define PUT_SB_ROOT_BLOCK(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0) #define PUT_SB_TREE_HEIGHT(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0) #define PUT_SB_REISERFS_STATE(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0) #define PUT_SB_VERSION(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0) #define PUT_SB_BMAP_NR(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0) #define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal) #define SB_ONDISK_JOURNAL_SIZE(s) \ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size)) #define SB_ONDISK_JOURNAL_1st_BLOCK(s) \ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block)) #define SB_ONDISK_JOURNAL_DEVICE(s) \ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev)) #define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal)) #define is_block_in_log_or_reserved_area(s, block) \ block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \ && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \ ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \ SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) int is_reiserfs_3_5(struct reiserfs_super_block *rs); int is_reiserfs_3_6(struct reiserfs_super_block *rs); int is_reiserfs_jr(struct reiserfs_super_block *rs); /* * ReiserFS leaves the first 64k unused, so that partition labels have * enough space. If someone wants to write a fancy bootloader that * needs more than 64k, let us know, and this will be increased in size. * This number must be larger than the largest block size on any * platform, or code will break. -Hans */ #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) #define REISERFS_FIRST_BLOCK unused_define #define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES /* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */ #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) /* reiserfs internal error code (used by search_by_key and fix_nodes)) */ #define CARRY_ON 0 #define REPEAT_SEARCH -1 #define IO_ERROR -2 #define NO_DISK_SPACE -3 #define NO_BALANCING_NEEDED (-4) #define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) #define QUOTA_EXCEEDED -6 typedef __u32 b_blocknr_t; typedef __le32 unp_t; struct unfm_nodeinfo { unp_t unfm_nodenum; unsigned short unfm_freespace; }; /* there are two formats of keys: 3.5 and 3.6 */ #define KEY_FORMAT_3_5 0 #define KEY_FORMAT_3_6 1 /* there are two stat datas */ #define STAT_DATA_V1 0 #define STAT_DATA_V2 1 static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode) { return container_of(inode, struct reiserfs_inode_info, vfs_inode); } static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) { return sb->s_fs_info; } /* * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 * which overflows on large file systems. */ static inline __u32 reiserfs_bmap_count(struct super_block *sb) { return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; } static inline int bmap_would_wrap(unsigned bmap_nr) { return bmap_nr > ((1LL << 16) - 1); } extern const struct xattr_handler * const reiserfs_xattr_handlers[]; /* * this says about version of key of all items (but stat data) the * object consists of */ #define get_inode_item_key_version( inode ) \ ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) #define set_inode_item_key_version( inode, version ) \ ({ if((version)==KEY_FORMAT_3_6) \ REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \ else \ REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; }) #define get_inode_sd_version(inode) \ ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1) #define set_inode_sd_version(inode, version) \ ({ if((version)==STAT_DATA_V2) \ REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \ else \ REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) /* * This is an aggressive tail suppression policy, I am hoping it * improves our benchmarks. The principle behind it is that percentage * space saving is what matters, not absolute space saving. This is * non-intuitive, but it helps to understand it if you consider that the * cost to access 4 blocks is not much more than the cost to access 1 * block, if you have to do a seek and rotate. A tail risks a * non-linear disk access that is significant as a percentage of total * time cost for a 4 block file and saves an amount of space that is * less significant as a percentage of space, or so goes the hypothesis. * -Hans */ #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ (\ (!(n_tail_size)) || \ (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ ( (n_file_size) >= (n_block_size) * 4 ) || \ ( ( (n_file_size) >= (n_block_size) * 3 ) && \ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \ ( ( (n_file_size) >= (n_block_size) * 2 ) && \ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \ ( ( (n_file_size) >= (n_block_size) ) && \ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ ) /* * Another strategy for tails, this one means only create a tail if all the * file would fit into one DIRECT item. * Primary intention for this one is to increase performance by decreasing * seeking. */ #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ (\ (!(n_tail_size)) || \ (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \ ) /* * values for s_umount_state field */ #define REISERFS_VALID_FS 1 #define REISERFS_ERROR_FS 2 /* * there are 5 item types currently */ #define TYPE_STAT_DATA 0 #define TYPE_INDIRECT 1 #define TYPE_DIRECT 2 #define TYPE_DIRENTRY 3 #define TYPE_MAXTYPE 3 #define TYPE_ANY 15 /* FIXME: comment is required */ /*************************************************************************** * KEY & ITEM HEAD * ***************************************************************************/ /* * directories use this key as well as old files */ struct offset_v1 { __le32 k_offset; __le32 k_uniqueness; } __attribute__ ((__packed__)); struct offset_v2 { __le64 v; } __attribute__ ((__packed__)); static inline __u16 offset_v2_k_type(const struct offset_v2 *v2) { __u8 type = le64_to_cpu(v2->v) >> 60; return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY; } static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type) { v2->v = (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60); } static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2) { return le64_to_cpu(v2->v) & (~0ULL >> 4); } static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) { offset &= (~0ULL >> 4); v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); } /* * Key of an item determines its location in the S+tree, and * is composed of 4 components */ struct reiserfs_key { /* packing locality: by default parent directory object id */ __le32 k_dir_id; __le32 k_objectid; /* object identifier */ union { struct offset_v1 k_offset_v1; struct offset_v2 k_offset_v2; } __attribute__ ((__packed__)) u; } __attribute__ ((__packed__)); struct in_core_key { /* packing locality: by default parent directory object id */ __u32 k_dir_id; __u32 k_objectid; /* object identifier */ __u64 k_offset; __u8 k_type; }; struct cpu_key { struct in_core_key on_disk_key; int version; /* 3 in all cases but direct2indirect and indirect2direct conversion */ int key_length; }; /* * Our function for comparing keys can compare keys of different * lengths. It takes as a parameter the length of the keys it is to * compare. These defines are used in determining what is to be passed * to it as that parameter. */ #define REISERFS_FULL_KEY_LEN 4 #define REISERFS_SHORT_KEY_LEN 2 /* The result of the key compare */ #define FIRST_GREATER 1 #define SECOND_GREATER -1 #define KEYS_IDENTICAL 0 #define KEY_FOUND 1 #define KEY_NOT_FOUND 0 #define KEY_SIZE (sizeof(struct reiserfs_key)) /* return values for search_by_key and clones */ #define ITEM_FOUND 1 #define ITEM_NOT_FOUND 0 #define ENTRY_FOUND 1 #define ENTRY_NOT_FOUND 0 #define DIRECTORY_NOT_FOUND -1 #define REGULAR_FILE_FOUND -2 #define DIRECTORY_FOUND -3 #define BYTE_FOUND 1 #define BYTE_NOT_FOUND 0 #define FILE_NOT_FOUND -1 #define POSITION_FOUND 1 #define POSITION_NOT_FOUND 0 /* return values for reiserfs_find_entry and search_by_entry_key */ #define NAME_FOUND 1 #define NAME_NOT_FOUND 0 #define GOTO_PREVIOUS_ITEM 2 #define NAME_FOUND_INVISIBLE 3 /* * Everything in the filesystem is stored as a set of items. The * item head contains the key of the item, its free space (for * indirect items) and specifies the location of the item itself * within the block. */ struct item_head { /* * Everything in the tree is found by searching for it based on * its key. */ struct reiserfs_key ih_key; union { /* * The free space in the last unformatted node of an * indirect item if this is an indirect item. This * equals 0xFFFF iff this is a direct item or stat data * item. Note that the key, not this field, is used to * determine the item type, and thus which field this * union contains. */ __le16 ih_free_space_reserved; /* * Iff this is a directory item, this field equals the * number of directory entries in the directory item. */ __le16 ih_entry_count; } __attribute__ ((__packed__)) u; __le16 ih_item_len; /* total size of the item body */ /* an offset to the item body within the block */ __le16 ih_item_location; /* * 0 for all old items, 2 for new ones. Highest bit is set by fsck * temporary, cleaned after all done */ __le16 ih_version; } __attribute__ ((__packed__)); /* size of item header */ #define IH_SIZE (sizeof(struct item_head)) #define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved) #define ih_version(ih) le16_to_cpu((ih)->ih_version) #define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count) #define ih_location(ih) le16_to_cpu((ih)->ih_item_location) #define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len) #define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0) #define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0) #define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0) #define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0) #define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0) #define unreachable_item(ih) (ih_version(ih) & (1 << 15)) #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) #define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) /* * these operate on indirect items, where you've got an array of ints * at a possibly unaligned location. These are a noop on ia32 * * p is the array of __u32, i is the index into the array, v is the value * to store there. */ #define get_block_num(p, i) get_unaligned_le32((p) + (i)) #define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) /* * in old version uniqueness field shows key type */ #define V1_SD_UNIQUENESS 0 #define V1_INDIRECT_UNIQUENESS 0xfffffffe #define V1_DIRECT_UNIQUENESS 0xffffffff #define V1_DIRENTRY_UNIQUENESS 500 #define V1_ANY_UNIQUENESS 555 /* FIXME: comment is required */ /* here are conversion routines */ static inline int uniqueness2type(__u32 uniqueness) CONSTF; static inline int uniqueness2type(__u32 uniqueness) { switch ((int)uniqueness) { case V1_SD_UNIQUENESS: return TYPE_STAT_DATA; case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT; case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT; case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY; case V1_ANY_UNIQUENESS: default: return TYPE_ANY; } } static inline __u32 type2uniqueness(int type) CONSTF; static inline __u32 type2uniqueness(int type) { switch (type) { case TYPE_STAT_DATA: return V1_SD_UNIQUENESS; case TYPE_INDIRECT: return V1_INDIRECT_UNIQUENESS; case TYPE_DIRECT: return V1_DIRECT_UNIQUENESS; case TYPE_DIRENTRY: return V1_DIRENTRY_UNIQUENESS; case TYPE_ANY: default: return V1_ANY_UNIQUENESS; } } /* * key is pointer to on disk key which is stored in le, result is cpu, * there is no way to get version of object from key, so, provide * version to these defines */ static inline loff_t le_key_k_offset(int version, const struct reiserfs_key *key) { return (version == KEY_FORMAT_3_5) ? le32_to_cpu(key->u.k_offset_v1.k_offset) : offset_v2_k_offset(&(key->u.k_offset_v2)); } static inline loff_t le_ih_k_offset(const struct item_head *ih) { return le_key_k_offset(ih_version(ih), &(ih->ih_key)); } static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) { if (version == KEY_FORMAT_3_5) { loff_t val = le32_to_cpu(key->u.k_offset_v1.k_uniqueness); return uniqueness2type(val); } else return offset_v2_k_type(&(key->u.k_offset_v2)); } static inline loff_t le_ih_k_type(const struct item_head *ih) { return le_key_k_type(ih_version(ih), &(ih->ih_key)); } static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, loff_t offset) { if (version == KEY_FORMAT_3_5) key->u.k_offset_v1.k_offset = cpu_to_le32(offset); else set_offset_v2_k_offset(&key->u.k_offset_v2, offset); } static inline void add_le_key_k_offset(int version, struct reiserfs_key *key, loff_t offset) { set_le_key_k_offset(version, key, le_key_k_offset(version, key) + offset); } static inline void add_le_ih_k_offset(struct item_head *ih, loff_t offset) { add_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); } static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) { set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); } static inline void set_le_key_k_type(int version, struct reiserfs_key *key, int type) { if (version == KEY_FORMAT_3_5) { type = type2uniqueness(type); key->u.k_offset_v1.k_uniqueness = cpu_to_le32(type); } else set_offset_v2_k_type(&key->u.k_offset_v2, type); } static inline void set_le_ih_k_type(struct item_head *ih, int type) { set_le_key_k_type(ih_version(ih), &(ih->ih_key), type); } static inline int is_direntry_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_DIRENTRY; } static inline int is_direct_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_DIRECT; } static inline int is_indirect_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_INDIRECT; } static inline int is_statdata_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_STAT_DATA; } /* item header has version. */ static inline int is_direntry_le_ih(struct item_head *ih) { return is_direntry_le_key(ih_version(ih), &ih->ih_key); } static inline int is_direct_le_ih(struct item_head *ih) { return is_direct_le_key(ih_version(ih), &ih->ih_key); } static inline int is_indirect_le_ih(struct item_head *ih) { return is_indirect_le_key(ih_version(ih), &ih->ih_key); } static inline int is_statdata_le_ih(struct item_head *ih) { return is_statdata_le_key(ih_version(ih), &ih->ih_key); } /* key is pointer to cpu key, result is cpu */ static inline loff_t cpu_key_k_offset(const struct cpu_key *key) { return key->on_disk_key.k_offset; } static inline loff_t cpu_key_k_type(const struct cpu_key *key) { return key->on_disk_key.k_type; } static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset) { key->on_disk_key.k_offset = offset; } static inline void set_cpu_key_k_type(struct cpu_key *key, int type) { key->on_disk_key.k_type = type; } static inline void cpu_key_k_offset_dec(struct cpu_key *key) { key->on_disk_key.k_offset--; } #define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) #define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) #define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) #define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) /* are these used ? */ #define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) #define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) #define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) #define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) #define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \ (!COMP_SHORT_KEYS(ih, key) && \ I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize)) /* maximal length of item */ #define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) #define MIN_ITEM_LEN 1 /* object identifier for root dir */ #define REISERFS_ROOT_OBJECTID 2 #define REISERFS_ROOT_PARENT_OBJECTID 1 extern struct reiserfs_key root_key; /* * Picture represents a leaf of the S+tree * ______________________________________________________ * | | Array of | | | * |Block | Object-Item | F r e e | Objects- | * | head | Headers | S p a c e | Items | * |______|_______________|___________________|___________| */ /* * Header of a disk block. More precisely, header of a formatted leaf * or internal node, and not the header of an unformatted node. */ struct block_head { __le16 blk_level; /* Level of a block in the tree. */ __le16 blk_nr_item; /* Number of keys/items in a block. */ __le16 blk_free_space; /* Block free space in bytes. */ __le16 blk_reserved; /* dump this in v4/planA */ /* kept only for compatibility */ struct reiserfs_key blk_right_delim_key; }; #define BLKH_SIZE (sizeof(struct block_head)) #define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level)) #define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item)) #define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space)) #define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved)) #define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val)) #define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val)) #define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val)) #define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val)) #define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) #define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) /* values for blk_level field of the struct block_head */ /* * When node gets removed from the tree its blk_level is set to FREE_LEVEL. * It is then used to see whether the node is still in the tree */ #define FREE_LEVEL 0 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ /* * Given the buffer head of a formatted node, resolve to the * block head of that node. */ #define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) /* Number of items that are in buffer. */ #define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) #define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh))) #define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh))) #define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0) #define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0) #define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0) /* Get right delimiting key. -- little endian */ #define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh)))) /* Does the buffer contain a disk leaf. */ #define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL) /* Does the buffer contain a disk internal node */ #define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ && B_LEVEL(bh) <= MAX_HEIGHT) /*************************************************************************** * STAT DATA * ***************************************************************************/ /* * old stat data is 32 bytes long. We are going to distinguish new one by * different size */ struct stat_data_v1 { __le16 sd_mode; /* file type, permissions */ __le16 sd_nlink; /* number of hard links */ __le16 sd_uid; /* owner */ __le16 sd_gid; /* group */ __le32 sd_size; /* file size */ __le32 sd_atime; /* time of last access */ __le32 sd_mtime; /* time file was last modified */ /* * time inode (stat data) was last changed * (except changes to sd_atime and sd_mtime) */ __le32 sd_ctime; union { __le32 sd_rdev; __le32 sd_blocks; /* number of blocks file uses */ } __attribute__ ((__packed__)) u; /* * first byte of file which is stored in a direct item: except that if * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no * direct item. The existence of this field really grates on me. * Let's replace it with a macro based on sd_size and our tail * suppression policy. Someday. -Hans */ __le32 sd_first_direct_byte; } __attribute__ ((__packed__)); #define SD_V1_SIZE (sizeof(struct stat_data_v1)) #define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5) #define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) #define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) #define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink)) #define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v)) #define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid)) #define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v)) #define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid)) #define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v)) #define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size)) #define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v)) #define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) #define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) #define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) #define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) #define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) #define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) #define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) #define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) #define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks)) #define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v)) #define sd_v1_first_direct_byte(sdp) \ (le32_to_cpu((sdp)->sd_first_direct_byte)) #define set_sd_v1_first_direct_byte(sdp,v) \ ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) /* inode flags stored in sd_attrs (nee sd_reserved) */ /* * we want common flags to have the same values as in ext2, * so chattr(1) will work without problems */ #define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL #define REISERFS_APPEND_FL FS_APPEND_FL #define REISERFS_SYNC_FL FS_SYNC_FL #define REISERFS_NOATIME_FL FS_NOATIME_FL #define REISERFS_NODUMP_FL FS_NODUMP_FL #define REISERFS_SECRM_FL FS_SECRM_FL #define REISERFS_UNRM_FL FS_UNRM_FL #define REISERFS_COMPR_FL FS_COMPR_FL #define REISERFS_NOTAIL_FL FS_NOTAIL_FL /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ REISERFS_NOATIME_FL | \ REISERFS_NODUMP_FL | \ REISERFS_SECRM_FL | \ REISERFS_COMPR_FL | \ REISERFS_NOTAIL_FL ) /* * Stat Data on disk (reiserfs version of UFS disk inode minus the * address blocks) */ struct stat_data { __le16 sd_mode; /* file type, permissions */ __le16 sd_attrs; /* persistent inode flags */ __le32 sd_nlink; /* number of hard links */ __le64 sd_size; /* file size */ __le32 sd_uid; /* owner */ __le32 sd_gid; /* group */ __le32 sd_atime; /* time of last access */ __le32 sd_mtime; /* time file was last modified */ /* * time inode (stat data) was last changed * (except changes to sd_atime and sd_mtime) */ __le32 sd_ctime; __le32 sd_blocks; union { __le32 sd_rdev; __le32 sd_generation; } __attribute__ ((__packed__)) u; } __attribute__ ((__packed__)); /* this is 44 bytes long */ #define SD_SIZE (sizeof(struct stat_data)) #define SD_V2_SIZE SD_SIZE #define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) #define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) #define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) /* sd_reserved */ /* set_sd_reserved */ #define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink)) #define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v)) #define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size)) #define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v)) #define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid)) #define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v)) #define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid)) #define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v)) #define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) #define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) #define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) #define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) #define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) #define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) #define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks)) #define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v)) #define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) #define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) #define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation)) #define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v)) #define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) #define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) /*************************************************************************** * DIRECTORY STRUCTURE * ***************************************************************************/ /* * Picture represents the structure of directory items * ________________________________________________ * | Array of | | | | | | * | directory |N-1| N-2 | .... | 1st |0th| * | entry headers | | | | | | * |_______________|___|_____|________|_______|___| * <---- directory entries ------> * * First directory item has k_offset component 1. We store "." and ".." * in one item, always, we never split "." and ".." into differing * items. This makes, among other things, the code for removing * directories simpler. */ #define SD_OFFSET 0 #define SD_UNIQUENESS 0 #define DOT_OFFSET 1 #define DOT_DOT_OFFSET 2 #define DIRENTRY_UNIQUENESS 500 #define FIRST_ITEM_OFFSET 1 /* * Q: How to get key of object pointed to by entry from entry? * * A: Each directory entry has its header. This header has deh_dir_id * and deh_objectid fields, those are key of object, entry points to */ /* * NOT IMPLEMENTED: * Directory will someday contain stat data of object */ struct reiserfs_de_head { __le32 deh_offset; /* third component of the directory entry key */ /* * objectid of the parent directory of the object, that is referenced * by directory entry */ __le32 deh_dir_id; /* objectid of the object, that is referenced by directory entry */ __le32 deh_objectid; __le16 deh_location; /* offset of name in the whole item */ /* * whether 1) entry contains stat data (for future), and * 2) whether entry is hidden (unlinked) */ __le16 deh_state; } __attribute__ ((__packed__)); #define DEH_SIZE sizeof(struct reiserfs_de_head) #define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) #define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id)) #define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid)) #define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location)) #define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state)) #define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v))) #define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v))) #define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v))) #define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v))) #define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v))) /* empty directory contains two entries "." and ".." and their headers */ #define EMPTY_DIR_SIZE \ (DEH_SIZE * 2 + ROUND_UP (sizeof(".") - 1) + ROUND_UP (sizeof("..") - 1)) /* old format directories have this size when empty */ #define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) #define DEH_Statdata 0 /* not used now */ #define DEH_Visible 2 /* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */ #if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__) # define ADDR_UNALIGNED_BITS (3) #endif /* * These are only used to manipulate deh_state. * Because of this, we'll use the ext2_ bit routines, * since they are little endian */ #ifdef ADDR_UNALIGNED_BITS # define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) # define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3) # define set_bit_unaligned(nr, addr) \ __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) # define clear_bit_unaligned(nr, addr) \ __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) # define test_bit_unaligned(nr, addr) \ test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) #else # define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr) # define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr) # define test_bit_unaligned(nr, addr) test_bit_le(nr, addr) #endif #define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) #define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) #define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) #define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); /* two entries per block (at least) */ #define REISERFS_MAX_NAME(block_size) 255 /* * this structure is used for operations on directory entries. It is * not a disk structure. * * When reiserfs_find_entry or search_by_entry_key find directory * entry, they return filled reiserfs_dir_entry structure */ struct reiserfs_dir_entry { struct buffer_head *de_bh; int de_item_num; struct item_head *de_ih; int de_entry_num; struct reiserfs_de_head *de_deh; int de_entrylen; int de_namelen; char *de_name; unsigned long *de_gen_number_bit_string; __u32 de_dir_id; __u32 de_objectid; struct cpu_key de_entry_key; }; /* * these defines are useful when a particular member of * a reiserfs_dir_entry is needed */ /* pointer to file name, stored in entry */ #define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \ (ih_item_body(bh, ih) + deh_location(deh)) /* length of name */ #define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ (I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) /* hash value occupies bits from 7 up to 30 */ #define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) /* generation number occupies 7 bits starting from 0 up to 6 */ #define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) #define MAX_GENERATION_NUMBER 127 #define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) /* * Picture represents an internal node of the reiserfs tree * ______________________________________________________ * | | Array of | Array of | Free | * |block | keys | pointers | space | * | head | N | N+1 | | * |______|_______________|___________________|___________| */ /*************************************************************************** * DISK CHILD * ***************************************************************************/ /* * Disk child pointer: * The pointer from an internal node of the tree to a node that is on disk. */ struct disk_child { __le32 dc_block_number; /* Disk child's block number. */ __le16 dc_size; /* Disk child's used space. */ __le16 dc_reserved; }; #define DC_SIZE (sizeof(struct disk_child)) #define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number)) #define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size)) #define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0) #define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0) /* Get disk child by buffer header and position in the tree node. */ #define B_N_CHILD(bh, n_pos) ((struct disk_child *)\ ((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos))) /* Get disk child number by buffer header and position in the tree node. */ #define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos))) #define PUT_B_N_CHILD_NUM(bh, n_pos, val) \ (put_dc_block_number(B_N_CHILD(bh, n_pos), val)) /* maximal value of field child_size in structure disk_child */ /* child size is the combined size of all items and their headers */ #define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) /* amount of used space in buffer (not including block head) */ #define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur))) /* max and min number of keys in internal node */ #define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) #define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) /*************************************************************************** * PATH STRUCTURES AND DEFINES * ***************************************************************************/ /* * search_by_key fills up the path from the root to the leaf as it descends * the tree looking for the key. It uses reiserfs_bread to try to find * buffers in the cache given their block number. If it does not find * them in the cache it reads them from disk. For each node search_by_key * finds using reiserfs_bread it then uses bin_search to look through that * node. bin_search will find the position of the block_number of the next * node if it is looking through an internal node. If it is looking through * a leaf node bin_search will find the position of the item which has key * either equal to given key, or which is the maximal key less than the * given key. */ struct path_element { /* Pointer to the buffer at the path in the tree. */ struct buffer_head *pe_buffer; /* Position in the tree node which is placed in the buffer above. */ int pe_position; }; /* * maximal height of a tree. don't change this without * changing JOURNAL_PER_BALANCE_CNT */ #define MAX_HEIGHT 5 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ #define EXTENDED_MAX_HEIGHT 7 /* Must be equal to at least 2. */ #define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ #define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ #define MAX_FEB_SIZE 6 /* * We need to keep track of who the ancestors of nodes are. When we * perform a search we record which nodes were visited while * descending the tree looking for the node we searched for. This list * of nodes is called the path. This information is used while * performing balancing. Note that this path information may become * invalid, and this means we must check it when using it to see if it * is still valid. You'll need to read search_by_key and the comments * in it, especially about decrement_counters_in_path(), to understand * this structure. * * Paths make the code so much harder to work with and debug.... An * enormous number of bugs are due to them, and trying to write or modify * code that uses them just makes my head hurt. They are based on an * excessive effort to avoid disturbing the precious VFS code.:-( The * gods only know how we are going to SMP the code that uses them. * znodes are the way! */ #define PATH_READA 0x1 /* do read ahead */ #define PATH_READA_BACK 0x2 /* read backwards */ struct treepath { int path_length; /* Length of the array above. */ int reada; /* Array of the path elements. */ struct path_element path_elements[EXTENDED_MAX_HEIGHT]; int pos_in_item; }; #define pos_in_item(path) ((path)->pos_in_item) #define INITIALIZE_PATH(var) \ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} /* Get path element by path and path position. */ #define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset)) /* Get buffer header at the path by path and path position. */ #define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer) /* Get position in the element at the path by path and path position. */ #define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) #define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) /* * you know, to the person who didn't write this the macro name does not * at first suggest what it does. Maybe POSITION_FROM_PATH_END? Or * maybe we should just focus on dumping paths... -Hans */ #define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) /* * in do_balance leaf has h == 0 in contrast with path structure, * where root has level == 0. That is why we need these defines */ /* tb->S[h] */ #define PATH_H_PBUFFER(path, h) \ PATH_OFFSET_PBUFFER(path, path->path_length - (h)) /* tb->F[h] or tb->S[0]->b_parent */ #define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1) #define PATH_H_POSITION(path, h) \ PATH_OFFSET_POSITION(path, path->path_length - (h)) /* tb->S[h]->b_item_order */ #define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) #define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) static inline void *reiserfs_node_data(const struct buffer_head *bh) { return bh->b_data + sizeof(struct block_head); } /* get key from internal node */ static inline struct reiserfs_key *internal_key(struct buffer_head *bh, int item_num) { struct reiserfs_key *key = reiserfs_node_data(bh); return &key[item_num]; } /* get the item header from leaf node */ static inline struct item_head *item_head(const struct buffer_head *bh, int item_num) { struct item_head *ih = reiserfs_node_data(bh); return &ih[item_num]; } /* get the key from leaf node */ static inline struct reiserfs_key *leaf_key(const struct buffer_head *bh, int item_num) { return &item_head(bh, item_num)->ih_key; } static inline void *ih_item_body(const struct buffer_head *bh, const struct item_head *ih) { return bh->b_data + ih_location(ih); } /* get item body from leaf node */ static inline void *item_body(const struct buffer_head *bh, int item_num) { return ih_item_body(bh, item_head(bh, item_num)); } static inline struct item_head *tp_item_head(const struct treepath *path) { return item_head(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); } static inline void *tp_item_body(const struct treepath *path) { return item_body(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); } #define get_last_bh(path) PATH_PLAST_BUFFER(path) #define get_item_pos(path) PATH_LAST_POSITION(path) #define item_moved(ih,path) comp_items(ih, path) #define path_changed(ih,path) comp_items (ih, path) /* array of the entry headers */ /* get item body */ #define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih))) /* * length of the directory entry in directory item. This define * calculates length of i-th directory entry using directory entry * locations from dir entry head. When it calculates length of 0-th * directory entry, it uses length of whole item in place of entry * location of the non-existent following entry in the calculation. * See picture above. */ static inline int entry_length(const struct buffer_head *bh, const struct item_head *ih, int pos_in_item) { struct reiserfs_de_head *deh; deh = B_I_DEH(bh, ih) + pos_in_item; if (pos_in_item) return deh_location(deh - 1) - deh_location(deh); return ih_item_len(ih) - deh_location(deh); } /*************************************************************************** * MISC * ***************************************************************************/ /* Size of pointer to the unformatted node. */ #define UNFM_P_SIZE (sizeof(unp_t)) #define UNFM_P_SHIFT 2 /* in in-core inode key is stored on le form */ #define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) #define MAX_UL_INT 0xffffffff #define MAX_INT 0x7ffffff #define MAX_US_INT 0xffff // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset static inline loff_t max_reiserfs_offset(struct inode *inode) { if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) return (loff_t) U32_MAX; return (loff_t) ((~(__u64) 0) >> 4); } #define MAX_KEY_OBJECTID MAX_UL_INT #define MAX_B_NUM MAX_UL_INT #define MAX_FC_NUM MAX_US_INT /* the purpose is to detect overflow of an unsigned short */ #define REISERFS_LINK_MAX (MAX_US_INT - 1000) /* * The following defines are used in reiserfs_insert_item * and reiserfs_append_item */ #define REISERFS_KERNEL_MEM 0 /* kernel memory mode */ #define REISERFS_USER_MEM 1 /* user memory mode */ #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) #define get_generation(s) atomic_read (&fs_generation(s)) #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) #define __fs_changed(gen,s) (gen != get_generation (s)) #define fs_changed(gen,s) \ ({ \ reiserfs_cond_resched(s); \ __fs_changed(gen, s); \ }) /*************************************************************************** * FIXATE NODES * ***************************************************************************/ #define VI_TYPE_LEFT_MERGEABLE 1 #define VI_TYPE_RIGHT_MERGEABLE 2 /* * To make any changes in the tree we always first find node, that * contains item to be changed/deleted or place to insert a new * item. We call this node S. To do balancing we need to decide what * we will shift to left/right neighbor, or to a new node, where new * item will be etc. To make this analysis simpler we build virtual * node. Virtual node is an array of items, that will replace items of * node S. (For instance if we are going to delete an item, virtual * node does not contain it). Virtual node keeps information about * item sizes and types, mergeability of first and last items, sizes * of all entries in directory item. We use this array of items when * calculating what we can shift to neighbors and how many nodes we * have to have if we do not any shiftings, if we shift to left/right * neighbor or to both. */ struct virtual_item { int vi_index; /* index in the array of item operations */ unsigned short vi_type; /* left/right mergeability */ /* length of item that it will have after balancing */ unsigned short vi_item_len; struct item_head *vi_ih; const char *vi_item; /* body of item (old or new) */ const void *vi_new_data; /* 0 always but paste mode */ void *vi_uarea; /* item specific area */ }; struct virtual_node { /* this is a pointer to the free space in the buffer */ char *vn_free_ptr; unsigned short vn_nr_item; /* number of items in virtual node */ /* * size of node , that node would have if it has * unlimited size and no balancing is performed */ short vn_size; /* mode of balancing (paste, insert, delete, cut) */ short vn_mode; short vn_affected_item_num; short vn_pos_in_item; /* item header of inserted item, 0 for other modes */ struct item_head *vn_ins_ih; const void *vn_data; /* array of items (including a new one, excluding item to be deleted) */ struct virtual_item *vn_vi; }; /* used by directory items when creating virtual nodes */ struct direntry_uarea { int flags; __u16 entry_count; __u16 entry_sizes[]; } __attribute__ ((__packed__)); /*************************************************************************** * TREE BALANCE * ***************************************************************************/ /* * This temporary structure is used in tree balance algorithms, and * constructed as we go to the extent that its various parts are * needed. It contains arrays of nodes that can potentially be * involved in the balancing of node S, and parameters that define how * each of the nodes must be balanced. Note that in these algorithms * for balancing the worst case is to need to balance the current node * S and the left and right neighbors and all of their parents plus * create a new node. We implement S1 balancing for the leaf nodes * and S0 balancing for the internal nodes (S1 and S0 are defined in * our papers.) */ /* size of the array of buffers to free at end of do_balance */ #define MAX_FREE_BLOCK 7 /* maximum number of FEB blocknrs on a single level */ #define MAX_AMOUNT_NEEDED 2 /* someday somebody will prefix every field in this struct with tb_ */ struct tree_balance { int tb_mode; int need_balance_dirty; struct super_block *tb_sb; struct reiserfs_transaction_handle *transaction_handle; struct treepath *tb_path; /* array of left neighbors of nodes in the path */ struct buffer_head *L[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ struct buffer_head *R[MAX_HEIGHT]; /* array of fathers of the left neighbors */ struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the right neighbors */ struct buffer_head *FR[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ struct buffer_head *CFR[MAX_HEIGHT]; /* * array of empty buffers. Number of buffers in array equals * cur_blknum. */ struct buffer_head *FEB[MAX_FEB_SIZE]; struct buffer_head *used[MAX_FEB_SIZE]; struct buffer_head *thrown[MAX_FEB_SIZE]; /* * array of number of items which must be shifted to the left in * order to balance the current node; for leaves includes item that * will be partially shifted; for internal nodes, it is the number * of child pointers rather than items. It includes the new item * being created. The code sometimes subtracts one to get the * number of wholly shifted items for other purposes. */ int lnum[MAX_HEIGHT]; /* substitute right for left in comment above */ int rnum[MAX_HEIGHT]; /* * array indexed by height h mapping the key delimiting L[h] and * S[h] to its item number within the node CFL[h] */ int lkey[MAX_HEIGHT]; /* substitute r for l in comment above */ int rkey[MAX_HEIGHT]; /* * the number of bytes by we are trying to add or remove from * S[h]. A negative value means removing. */ int insert_size[MAX_HEIGHT]; /* * number of nodes that will replace node S[h] after balancing * on the level h of the tree. If 0 then S is being deleted, * if 1 then S is remaining and no new nodes are being created, * if 2 or 3 then 1 or 2 new nodes is being created */ int blknum[MAX_HEIGHT]; /* fields that are used only for balancing leaves of the tree */ /* number of empty blocks having been already allocated */ int cur_blknum; /* number of items that fall into left most node when S[0] splits */ int s0num; /* * number of bytes which can flow to the left neighbor from the left * most liquid item that cannot be shifted from S[0] entirely * if -1 then nothing will be partially shifted */ int lbytes; /* * number of bytes which will flow to the right neighbor from the right * most liquid item that cannot be shifted from S[0] entirely * if -1 then nothing will be partially shifted */ int rbytes; /* * index into the array of item headers in * S[0] of the affected item */ int item_pos; /* new nodes allocated to hold what could not fit into S */ struct buffer_head *S_new[2]; /* * number of items that will be placed into nodes in S_new * when S[0] splits */ int snum[2]; /* * number of bytes which flow to nodes in S_new when S[0] splits * note: if S[0] splits into 3 nodes, then items do not need to be cut */ int sbytes[2]; int pos_in_item; int zeroes_num; /* * buffers which are to be freed after do_balance finishes * by unfix_nodes */ struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* * kmalloced memory. Used to create virtual node and keep * map of dirtied bitmap blocks */ char *vn_buf; int vn_buf_size; /* size of the vn_buf */ /* VN starts after bitmap of bitmap blocks */ struct virtual_node *tb_vn; /* * saved value of `reiserfs_generation' counter see * FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ int fs_gen; #ifdef DISPLACE_NEW_PACKING_LOCALITIES /* * key pointer, to pass to block allocator or * another low-level subsystem */ struct in_core_key key; #endif }; /* These are modes of balancing */ /* When inserting an item. */ #define M_INSERT 'i' /* * When inserting into (directories only) or appending onto an already * existent item. */ #define M_PASTE 'p' /* When deleting an item. */ #define M_DELETE 'd' /* When truncating an item or removing an entry from a (directory) item. */ #define M_CUT 'c' /* used when balancing on leaf level skipped (in reiserfsck) */ #define M_INTERNAL 'n' /* * When further balancing is not needed, then do_balance does not need * to be called. */ #define M_SKIP_BALANCING 's' #define M_CONVERT 'v' /* modes of leaf_move_items */ #define LEAF_FROM_S_TO_L 0 #define LEAF_FROM_S_TO_R 1 #define LEAF_FROM_R_TO_L 2 #define LEAF_FROM_L_TO_R 3 #define LEAF_FROM_S_TO_SNEW 4 #define FIRST_TO_LAST 0 #define LAST_TO_FIRST 1 /* * used in do_balance for passing parent of node information that has * been gotten from tb struct */ struct buffer_info { struct tree_balance *tb; struct buffer_head *bi_bh; struct buffer_head *bi_parent; int bi_position; }; static inline struct super_block *sb_from_tb(struct tree_balance *tb) { return tb ? tb->tb_sb : NULL; } static inline struct super_block *sb_from_bi(struct buffer_info *bi) { return bi ? sb_from_tb(bi->tb) : NULL; } /* * there are 4 types of items: stat data, directory item, indirect, direct. * +-------------------+------------+--------------+------------+ * | | k_offset | k_uniqueness | mergeable? | * +-------------------+------------+--------------+------------+ * | stat data | 0 | 0 | no | * +-------------------+------------+--------------+------------+ * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. | no | * | non 1st directory | hash value | UNIQUENESS | yes | * | item | | | | * +-------------------+------------+--------------+------------+ * | indirect item | offset + 1 |TYPE_INDIRECT | [1] | * +-------------------+------------+--------------+------------+ * | direct item | offset + 1 |TYPE_DIRECT | [2] | * +-------------------+------------+--------------+------------+ * * [1] if this is not the first indirect item of the object * [2] if this is not the first direct item of the object */ struct item_operations { int (*bytes_number) (struct item_head * ih, int block_size); void (*decrement_key) (struct cpu_key *); int (*is_left_mergeable) (struct reiserfs_key * ih, unsigned long bsize); void (*print_item) (struct item_head *, char *item); void (*check_item) (struct item_head *, char *item); int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, int is_affected, int insert_size); int (*check_left) (struct virtual_item * vi, int free, int start_skip, int end_skip); int (*check_right) (struct virtual_item * vi, int free); int (*part_size) (struct virtual_item * vi, int from, int to); int (*unit_num) (struct virtual_item * vi); void (*print_vi) (struct virtual_item * vi); }; extern struct item_operations *item_ops[TYPE_ANY + 1]; #define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) #define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) #define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item) #define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item) #define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size) #define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip) #define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free) #define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to) #define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) #define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) #define COMP_SHORT_KEYS comp_short_keys /* number of blocks pointed to by the indirect item */ #define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) /* * the used space within the unformatted node corresponding * to pos within the item pointed to by ih */ #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) /* * number of bytes contained by the direct item or the * unformatted nodes the indirect item points to */ /* following defines use reiserfs buffer header and item header */ /* get stat-data */ #define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) /* this is 3976 for size==4096 */ #define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) /* * indirect items consist of entries which contain blocknrs, pos * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the * blocknr contained by the entry pos points to */ #define B_I_POS_UNFM_POINTER(bh, ih, pos) \ le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos))) #define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \ (*(((unp_t *)ih_item_body(bh, ih)) + (pos)) = cpu_to_le32(val)) struct reiserfs_iget_args { __u32 objectid; __u32 dirid; }; /*************************************************************************** * FUNCTION DECLARATIONS * ***************************************************************************/ #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) #define journal_trans_half(blocksize) \ ((blocksize - sizeof(struct reiserfs_journal_desc) - 12) / sizeof(__u32)) /* journal.c see journal.c for all the comments here */ /* first block written in a commit. */ struct reiserfs_journal_desc { __le32 j_trans_id; /* id of commit */ /* length of commit. len +1 is the commit block */ __le32 j_len; __le32 j_mount_id; /* mount id of this trans */ __le32 j_realblock[]; /* real locations for each block */ }; #define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id) #define get_desc_trans_len(d) le32_to_cpu((d)->j_len) #define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id) #define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0) #define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0) #define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0) /* last block written in a commit */ struct reiserfs_journal_commit { __le32 j_trans_id; /* must match j_trans_id from the desc block */ __le32 j_len; /* ditto */ __le32 j_realblock[]; /* real locations for each block */ }; #define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id) #define get_commit_trans_len(c) le32_to_cpu((c)->j_len) #define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id) #define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) #define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) /* * this header block gets written whenever a transaction is considered * fully flushed, and is more recent than the last fully flushed transaction. * fully flushed means all the log blocks and all the real blocks are on * disk, and this transaction does not need to be replayed. */ struct reiserfs_journal_header { /* id of last fully flushed transaction */ __le32 j_last_flush_trans_id; /* offset in the log of where to start replay after a crash */ __le32 j_first_unflushed_offset; __le32 j_mount_id; /* 12 */ struct journal_params jh_journal; }; /* biggest tunable defines are right here */ #define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ /* biggest possible single transaction, don't change for now (8/3/99) */ #define JOURNAL_TRANS_MAX_DEFAULT 1024 #define JOURNAL_TRANS_MIN_DEFAULT 256 /* * max blocks to batch into one transaction, * don't make this any bigger than 900 */ #define JOURNAL_MAX_BATCH_DEFAULT 900 #define JOURNAL_MIN_RATIO 2 #define JOURNAL_MAX_COMMIT_AGE 30 #define JOURNAL_MAX_TRANS_AGE 30 #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) #define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \ 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \ REISERFS_QUOTA_TRANS_BLOCKS(sb))) #ifdef CONFIG_QUOTA #define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA)) /* We need to update data and inode (atime) */ #define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0) /* 1 balancing, 1 bitmap, 1 data per write + stat data update */ #define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ (DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0) /* same as with INIT */ #define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ (DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0) #else #define REISERFS_QUOTA_TRANS_BLOCKS(s) 0 #define REISERFS_QUOTA_INIT_BLOCKS(s) 0 #define REISERFS_QUOTA_DEL_BLOCKS(s) 0 #endif /* * both of these can be as low as 1, or as high as you want. The min is the * number of 4k bitmap nodes preallocated on mount. New nodes are allocated * as needed, and released when transactions are committed. On release, if * the current number of nodes is > max, the node is freed, otherwise, * it is put on a free list for faster use later. */ #define REISERFS_MIN_BITMAP_NODES 10 #define REISERFS_MAX_BITMAP_NODES 100 /* these are based on journal hash size of 8192 */ #define JBH_HASH_SHIFT 13 #define JBH_HASH_MASK 8191 #define _jhashfn(sb,block) \ (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \ (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) #define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) /* We need these to make journal.c code more readable */ #define journal_find_get_block(s, block) __find_get_block(\ SB_JOURNAL(s)->j_bdev_handle->bdev, block, s->s_blocksize) #define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_bdev_handle->bdev,\ block, s->s_blocksize) #define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_bdev_handle->bdev,\ block, s->s_blocksize) enum reiserfs_bh_state_bits { BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ BH_JDirty_wait, /* * disk block was taken off free list before being in a * finished transaction, or written to disk. Can be reused immed. */ BH_JNew, BH_JPrepared, BH_JRestore_dirty, BH_JTest, /* debugging only will go away */ }; BUFFER_FNS(JDirty, journaled); TAS_BUFFER_FNS(JDirty, journaled); BUFFER_FNS(JDirty_wait, journal_dirty); TAS_BUFFER_FNS(JDirty_wait, journal_dirty); BUFFER_FNS(JNew, journal_new); TAS_BUFFER_FNS(JNew, journal_new); BUFFER_FNS(JPrepared, journal_prepared); TAS_BUFFER_FNS(JPrepared, journal_prepared); BUFFER_FNS(JRestore_dirty, journal_restore_dirty); TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty); BUFFER_FNS(JTest, journal_test); TAS_BUFFER_FNS(JTest, journal_test); /* transaction handle which is passed around for all journal calls */ struct reiserfs_transaction_handle { /* * super for this FS when journal_begin was called. saves calls to * reiserfs_get_super also used by nested transactions to make * sure they are nesting on the right FS _must_ be first * in the handle */ struct super_block *t_super; int t_refcount; int t_blocks_logged; /* number of blocks this writer has logged */ int t_blocks_allocated; /* number of blocks this writer allocated */ /* sanity check, equals the current trans id */ unsigned int t_trans_id; void *t_handle_save; /* save existing current->journal_info */ /* * if new block allocation occurres, that block * should be displaced from others */ unsigned displace_new_blocks:1; struct list_head t_list; }; /* * used to keep track of ordered and tail writes, attached to the buffer * head through b_journal_head. */ struct reiserfs_jh { struct reiserfs_journal_list *jl; struct buffer_head *bh; struct list_head list; }; void reiserfs_free_jh(struct buffer_head *bh); int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); int journal_mark_dirty(struct reiserfs_transaction_handle *, struct buffer_head *bh); static inline int reiserfs_file_data_log(struct inode *inode) { if (reiserfs_data_log(inode->i_sb) || (REISERFS_I(inode)->i_flags & i_data_log)) return 1; return 0; } static inline int reiserfs_transaction_running(struct super_block *s) { struct reiserfs_transaction_handle *th = current->journal_info; if (th && th->t_super == s) return 1; if (th && th->t_super == NULL) BUG(); return 0; } static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th) { return th->t_blocks_allocated - th->t_blocks_logged; } struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct super_block *, int count); int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); void reiserfs_vfs_truncate_file(struct inode *inode); int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to); void reiserfs_flush_old_commits(struct super_block *); int reiserfs_commit_for_inode(struct inode *); int reiserfs_inode_needs_commit(struct inode *); void reiserfs_update_inode_transaction(struct inode *); void reiserfs_wait_on_write_block(struct super_block *s); void reiserfs_block_writes(struct reiserfs_transaction_handle *th); void reiserfs_allow_writes(struct super_block *s); void reiserfs_check_lock_depth(struct super_block *s, char *caller); int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait); void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh); int journal_init(struct super_block *, const char *j_dev_name, int old_format, unsigned int); int journal_release(struct reiserfs_transaction_handle *, struct super_block *); int journal_release_error(struct reiserfs_transaction_handle *, struct super_block *); int journal_end(struct reiserfs_transaction_handle *); int journal_end_sync(struct reiserfs_transaction_handle *); int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block *, b_blocknr_t blocknr); int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, int bit_nr, int searchall, b_blocknr_t *next); int journal_begin(struct reiserfs_transaction_handle *, struct super_block *sb, unsigned long); int journal_join_abort(struct reiserfs_transaction_handle *, struct super_block *sb); void reiserfs_abort_journal(struct super_block *sb, int errno); void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); void reiserfs_schedule_old_flush(struct super_block *s); void reiserfs_cancel_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); /* objectid.c */ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th); void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, __u32 objectid_to_release); int reiserfs_convert_objectid_map_v1(struct super_block *); /* stree.c */ int B_IS_IN_TREE(const struct buffer_head *); extern void copy_item_head(struct item_head *to, const struct item_head *from); /* first key is in cpu form, second - le */ extern int comp_short_keys(const struct reiserfs_key *le_key, const struct cpu_key *cpu_key); extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); /* both are in le form */ extern int comp_le_keys(const struct reiserfs_key *, const struct reiserfs_key *); extern int comp_short_le_keys(const struct reiserfs_key *, const struct reiserfs_key *); /* * get key version from on disk key - kludge */ static inline int le_key_version(const struct reiserfs_key *key) { int type; type = offset_v2_k_type(&(key->u.k_offset_v2)); if (type != TYPE_DIRECT && type != TYPE_INDIRECT && type != TYPE_DIRENTRY) return KEY_FORMAT_3_5; return KEY_FORMAT_3_6; } static inline void copy_key(struct reiserfs_key *to, const struct reiserfs_key *from) { memcpy(to, from, KEY_SIZE); } int comp_items(const struct item_head *stored_ih, const struct treepath *path); const struct reiserfs_key *get_rkey(const struct treepath *chk_path, const struct super_block *sb); int search_by_key(struct super_block *, const struct cpu_key *, struct treepath *, int); #define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) int search_for_position_by_key(struct super_block *sb, const struct cpu_key *cpu_key, struct treepath *search_path); extern void decrement_bcount(struct buffer_head *bh); void decrement_counters_in_path(struct treepath *search_path); void pathrelse(struct treepath *search_path); int reiserfs_check_path(struct treepath *p); void pathrelse_and_restore(struct super_block *s, struct treepath *search_path); int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *key, struct item_head *ih, struct inode *inode, const char *body); int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *key, struct inode *inode, const char *body, int paste_size); int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, struct treepath *path, struct cpu_key *key, struct inode *inode, struct page *page, loff_t new_file_size); int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *key, struct inode *inode, struct buffer_head *un_bh); void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, struct inode *inode, struct reiserfs_key *key); int reiserfs_delete_object(struct reiserfs_transaction_handle *th, struct inode *inode); int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *inode, struct page *, int update_timestamps); #define i_block_size(inode) ((inode)->i_sb->s_blocksize) #define file_size(inode) ((inode)->i_size) #define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1)) #define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\ !STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 ) void padd_item(char *item, int total_length, int length); /* inode.c */ /* args for the create parameter of reiserfs_get_block */ #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ #define GET_BLOCK_CREATE 1 /* add anything you need to find block */ #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ #define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ void reiserfs_read_locked_inode(struct inode *inode, struct reiserfs_iget_args *args); int reiserfs_find_actor(struct inode *inode, void *p); int reiserfs_init_locked_inode(struct inode *inode, void *p); void reiserfs_evict_inode(struct inode *inode); int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); int reiserfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create); struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, struct inode *parent); int reiserfs_truncate_file(struct inode *, int update_timestamps); void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, int type, int key_length); void make_le_item_head(struct item_head *ih, const struct cpu_key *key, int version, loff_t offset, int type, int length, int entry_count); struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); struct reiserfs_security_handle; int reiserfs_new_inode(struct reiserfs_transaction_handle *th, struct inode *dir, umode_t mode, const char *symname, loff_t i_size, struct dentry *dentry, struct inode *inode, struct reiserfs_security_handle *security); void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t size); static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, struct inode *inode) { reiserfs_update_sd_size(th, inode, inode->i_size); } void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); /* namei.c */ void reiserfs_init_priv_inode(struct inode *inode); void set_de_name_and_namelen(struct reiserfs_dir_entry *de); int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, struct treepath *path, struct reiserfs_dir_entry *de); struct dentry *reiserfs_get_parent(struct dentry *); #ifdef CONFIG_REISERFS_PROC_INFO int reiserfs_proc_info_init(struct super_block *sb); int reiserfs_proc_info_done(struct super_block *sb); int reiserfs_proc_info_global_init(void); int reiserfs_proc_info_global_done(void); #define PROC_EXP( e ) e #define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data #define PROC_INFO_MAX( sb, field, value ) \ __PINFO( sb ).field = \ max( REISERFS_SB( sb ) -> s_proc_info_data.field, value ) #define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) ) #define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) ) #define PROC_INFO_BH_STAT( sb, bh, level ) \ PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \ PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \ PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) ) #else static inline int reiserfs_proc_info_init(struct super_block *sb) { return 0; } static inline int reiserfs_proc_info_done(struct super_block *sb) { return 0; } static inline int reiserfs_proc_info_global_init(void) { return 0; } static inline int reiserfs_proc_info_global_done(void) { return 0; } #define PROC_EXP( e ) #define VOID_V ( ( void ) 0 ) #define PROC_INFO_MAX( sb, field, value ) VOID_V #define PROC_INFO_INC( sb, field ) VOID_V #define PROC_INFO_ADD( sb, field, val ) VOID_V #define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V #endif /* dir.c */ extern const struct inode_operations reiserfs_dir_inode_operations; extern const struct inode_operations reiserfs_symlink_inode_operations; extern const struct inode_operations reiserfs_special_inode_operations; extern const struct file_operations reiserfs_dir_operations; int reiserfs_readdir_inode(struct inode *, struct dir_context *); /* tail_conversion.c */ int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, struct treepath *, struct buffer_head *, loff_t); int indirect2direct(struct reiserfs_transaction_handle *, struct inode *, struct page *, struct treepath *, const struct cpu_key *, loff_t, char *); void reiserfs_unmap_buffer(struct buffer_head *); /* file.c */ extern const struct inode_operations reiserfs_file_inode_operations; extern const struct inode_operations reiserfs_priv_file_inode_operations; extern const struct file_operations reiserfs_file_operations; extern const struct address_space_operations reiserfs_address_space_operations; /* fix_nodes.c */ int fix_nodes(int n_op_mode, struct tree_balance *tb, struct item_head *ins_ih, const void *); void unfix_nodes(struct tree_balance *); /* prints.c */ void __reiserfs_panic(struct super_block *s, const char *id, const char *function, const char *fmt, ...) __attribute__ ((noreturn)); #define reiserfs_panic(s, id, fmt, args...) \ __reiserfs_panic(s, id, __func__, fmt, ##args) void __reiserfs_error(struct super_block *s, const char *id, const char *function, const char *fmt, ...); #define reiserfs_error(s, id, fmt, args...) \ __reiserfs_error(s, id, __func__, fmt, ##args) void reiserfs_info(struct super_block *s, const char *fmt, ...); void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); void print_indirect_item(struct buffer_head *bh, int item_num); void store_print_tb(struct tree_balance *tb); void print_cur_tb(char *mes); void print_de(struct reiserfs_dir_entry *de); void print_bi(struct buffer_info *bi, char *mes); #define PRINT_LEAF_ITEMS 1 /* print all items */ #define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ #define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ void print_block(struct buffer_head *bh, ...); void print_bmap(struct super_block *s, int silent); void print_bmap_block(int i, char *data, int size, int silent); /*void print_super_block (struct super_block * s, char * mes);*/ void print_objectid_map(struct super_block *s); void print_block_head(struct buffer_head *bh, char *mes); void check_leaf(struct buffer_head *bh); void check_internal(struct buffer_head *bh); void print_statistics(struct super_block *s); char *reiserfs_hashname(int code); /* lbalance.c */ int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, int mov_bytes, struct buffer_head *Snew); int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes); int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes); void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, int del_num, int del_bytes); void leaf_insert_into_buf(struct buffer_info *bi, int before, struct item_head * const inserted_item_ih, const char * const inserted_item_body, int zeros_number); void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, int pos_in_item, int paste_size, const char * const body, int zeros_number); void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, int pos_in_item, int cut_size); void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, int new_entry_count, struct reiserfs_de_head *new_dehs, const char *records, int paste_size); /* ibalance.c */ int balance_internal(struct tree_balance *, int, int, struct item_head *, struct buffer_head **); /* do_balance.c */ void do_balance_mark_leaf_dirty(struct tree_balance *tb, struct buffer_head *bh, int flag); #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty void do_balance(struct tree_balance *tb, struct item_head *ih, const char *body, int flag); void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh); int get_left_neighbor_position(struct tree_balance *tb, int h); int get_right_neighbor_position(struct tree_balance *tb, int h); void replace_key(struct tree_balance *tb, struct buffer_head *, int, struct buffer_head *, int); void make_empty_node(struct buffer_info *); struct buffer_head *get_FEB(struct tree_balance *); /* bitmap.c */ /* * structure contains hints for block allocator, and it is a container for * arguments, such as node, search path, transaction_handle, etc. */ struct __reiserfs_blocknr_hint { /* inode passed to allocator, if we allocate unf. nodes */ struct inode *inode; sector_t block; /* file offset, in blocks */ struct in_core_key key; /* * search path, used by allocator to deternine search_start by * various ways */ struct treepath *path; /* * transaction handle is needed to log super blocks * and bitmap blocks changes */ struct reiserfs_transaction_handle *th; b_blocknr_t beg, end; /* * a field used to transfer search start value (block number) * between different block allocator procedures * (determine_search_start() and others) */ b_blocknr_t search_start; /* * is set in determine_prealloc_size() function, * used by underlayed function that do actual allocation */ int prealloc_size; /* * the allocator uses different polices for getting disk * space for formatted/unformatted blocks with/without preallocation */ unsigned formatted_node:1; unsigned preallocate:1; }; typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; int reiserfs_parse_alloc_options(struct super_block *, char *); void reiserfs_init_alloc_options(struct super_block *s); /* * given a directory, this will tell you what packing locality * to use for a new object underneat it. The locality is returned * in disk byte order (le). */ __le32 reiserfs_choose_packing(struct inode *dir); void show_alloc_options(struct seq_file *seq, struct super_block *s); int reiserfs_init_bitmap_cache(struct super_block *sb); void reiserfs_free_bitmap_cache(struct super_block *sb); void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap); int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, b_blocknr_t, int for_unformatted); int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int, int); static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb, b_blocknr_t * new_blocknrs, int amount_needed) { reiserfs_blocknr_hint_t hint = { .th = tb->transaction_handle, .path = tb->tb_path, .inode = NULL, .key = tb->key, .block = 0, .formatted_node = 1 }; return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, 0); } static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle *th, struct inode *inode, b_blocknr_t * new_blocknrs, struct treepath *path, sector_t block) { reiserfs_blocknr_hint_t hint = { .th = th, .path = path, .inode = inode, .block = block, .formatted_node = 0, .preallocate = 0 }; return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); } #ifdef REISERFS_PREALLOCATE static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle *th, struct inode *inode, b_blocknr_t * new_blocknrs, struct treepath *path, sector_t block) { reiserfs_blocknr_hint_t hint = { .th = th, .path = path, .inode = inode, .block = block, .formatted_node = 0, .preallocate = 1 }; return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); } void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, struct inode *inode); void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th); #endif /* hashes.c */ __u32 keyed_hash(const signed char *msg, int len); __u32 yura_hash(const signed char *msg, int len); __u32 r5_hash(const signed char *msg, int len); #define reiserfs_set_le_bit __set_bit_le #define reiserfs_test_and_set_le_bit __test_and_set_bit_le #define reiserfs_clear_le_bit __clear_bit_le #define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le #define reiserfs_test_le_bit test_bit_le #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le /* * sometimes reiserfs_truncate may require to allocate few new blocks * to perform indirect2direct conversion. People probably used to * think, that truncate should work without problems on a filesystem * without free disk space. They may complain that they can not * truncate due to lack of free disk space. This spare space allows us * to not worry about it. 500 is probably too much, but it should be * absolutely safe */ #define SPARE_SPACE 500 /* prototypes from ioctl.c */ int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int reiserfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long reiserfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); int reiserfs_unpack(struct inode *inode);
82 308 105 3059 1 877 3602 9 2848 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H #include <linux/sched/coredump.h> #include <linux/mm_types.h> #include <linux/fs.h> /* only for vma_is_dax() */ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); void huge_pmd_set_accessed(struct vm_fault *vmf); int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, unsigned long addr, struct vm_area_struct *vma); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); #else static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) { } #endif vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf); bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long next); int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr); int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr); bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, unsigned long cp_flags); vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_UNSUPPORTED, TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, }; struct kobject; struct kobj_attribute; ssize_t single_hugepage_flag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count, enum transparent_hugepage_flag flag); ssize_t single_hugepage_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) /* * Mask of all large folio orders supported for anonymous THP; all orders up to * and including PMD_ORDER, except order-0 (which is not "huge") and order-1 * (which is a limitation of the THP implementation). */ #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) /* * Mask of all large folio orders supported for file THP. */ #define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) /* * Mask of all large folio orders supported for THP. */ #define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) #define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \ (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order))) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) #define HPAGE_PUD_SHIFT PUD_SHIFT #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) extern unsigned long transparent_hugepage_flags; extern unsigned long huge_anon_orders_always; extern unsigned long huge_anon_orders_madvise; extern unsigned long huge_anon_orders_inherit; static inline bool hugepage_global_enabled(void) { return transparent_hugepage_flags & ((1<<TRANSPARENT_HUGEPAGE_FLAG) | (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)); } static inline bool hugepage_global_always(void) { return transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG); } static inline bool hugepage_flags_enabled(void) { /* * We cover both the anon and the file-backed case here; we must return * true if globally enabled, even when all anon sizes are set to never. * So we don't need to look at huge_anon_orders_inherit. */ return hugepage_global_enabled() || huge_anon_orders_always || huge_anon_orders_madvise; } static inline int highest_order(unsigned long orders) { return fls_long(orders) - 1; } static inline int next_order(unsigned long *orders, int prev) { *orders &= ~BIT(prev); return highest_order(*orders); } /* * Do the below checks: * - For file vma, check if the linear page offset of vma is * order-aligned within the file. The hugepage is * guaranteed to be order-aligned within the file, but we must * check that the order-aligned addresses in the VMA map to * order-aligned offsets within the file, else the hugepage will * not be mappable. * - For all vmas, check if the haddr is in an aligned hugepage * area. */ static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, unsigned long addr, int order) { unsigned long hpage_size = PAGE_SIZE << order; unsigned long haddr; /* Don't have to check pgoff for anonymous vma */ if (!vma_is_anonymous(vma)) { if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, hpage_size >> PAGE_SHIFT)) return false; } haddr = ALIGN_DOWN(addr, hpage_size); if (haddr < vma->vm_start || haddr + hpage_size > vma->vm_end) return false; return true; } /* * Filter the bitfield of input orders to the ones suitable for use in the vma. * See thp_vma_suitable_order(). * All orders that pass the checks are returned as a bitfield. */ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, unsigned long addr, unsigned long orders) { int order; /* * Iterate over orders, highest to lowest, removing orders that don't * meet alignment requirements from the set. Exit loop at first order * that meets requirements, since all lower orders must also meet * requirements. */ order = highest_order(orders); while (orders) { if (thp_vma_suitable_order(vma, addr, order)) break; order = next_order(&orders, order); } return orders; } static inline bool file_thp_enabled(struct vm_area_struct *vma) { struct inode *inode; if (!vma->vm_file) return false; inode = vma->vm_file->f_inode; return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, bool smaps, bool in_pf, bool enforce_sysfs, unsigned long orders); /** * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma * @vma: the vm area to check * @vm_flags: use these vm_flags instead of vma->vm_flags * @smaps: whether answer will be used for smaps file * @in_pf: whether answer will be used by page fault handler * @enforce_sysfs: whether sysfs config should be taken into account * @orders: bitfield of all orders to consider * * Calculates the intersection of the requested hugepage orders and the allowed * hugepage orders for the provided vma. Permitted orders are encoded as a set * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3 * corresponds to order-3, etc). Order-0 is never considered a hugepage order. * * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage * orders are allowed. */ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, bool smaps, bool in_pf, bool enforce_sysfs, unsigned long orders) { /* Optimization to check if required orders are enabled early. */ if (enforce_sysfs && vma_is_anonymous(vma)) { unsigned long mask = READ_ONCE(huge_anon_orders_always); if (vm_flags & VM_HUGEPAGE) mask |= READ_ONCE(huge_anon_orders_madvise); if (hugepage_global_always() || ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled())) mask |= READ_ONCE(huge_anon_orders_inherit); orders &= mask; if (!orders) return 0; } return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, orders); } #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); void folio_prep_large_rmappable(struct folio *folio); bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { return split_huge_page_to_list(page, NULL); } void deferred_split_folio(struct folio *folio); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false, NULL); \ } while (0) void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct folio *folio); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ if (pud_trans_huge(*____pud) \ || pud_devmap(*____pud)) \ __split_huge_pud(__vma, __pud, __address); \ } while (0) int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); static inline int is_swap_pmd(pmd_t pmd) { return !pmd_none(pmd) && !pmd_present(pmd); } /* mmap_lock must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { if (pud_trans_huge(*pud) || pud_devmap(*pud)) return __pud_trans_huge_lock(pud, vma); else return NULL; } /** * folio_test_pmd_mappable - Can we map this folio with a PMD? * @folio: The folio to test */ static inline bool folio_test_pmd_mappable(struct folio *folio) { return folio_order(folio) >= HPAGE_PMD_ORDER; } struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap); vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); extern struct page *huge_zero_page; extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_page(struct page *page) { return READ_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) { return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); } static inline bool is_huge_zero_pud(pud_t pud) { return false; } struct page *mm_get_huge_zero_page(struct mm_struct *mm); void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) static inline bool folio_test_pmd_mappable(struct folio *folio) { return false; } static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, unsigned long addr, int order) { return false; } static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, unsigned long addr, unsigned long orders) { return 0; } static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, bool smaps, bool in_pf, bool enforce_sysfs, unsigned long orders) { return 0; } static inline void folio_prep_large_rmappable(struct folio *folio) {} #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL static inline bool can_split_folio(struct folio *folio, int *pextra_pins) { return false; } static inline int split_huge_page_to_list(struct page *page, struct list_head *list) { return 0; } static inline int split_huge_page(struct page *page) { return 0; } static inline void deferred_split_folio(struct folio *folio) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct folio *folio) {} #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { return -EINVAL; } static inline int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { return -EINVAL; } sta