Total coverage: 306131 (14%)of 2275182
2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 // SPDX-License-Identifier: GPL-2.0 #include <linux/cpumask.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/proc_fs.h> #include <linux/sched.h> #include <linux/sched/stat.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/time_namespace.h> #include <linux/irqnr.h> #include <linux/sched/cputime.h> #include <linux/tick.h> #ifndef arch_irq_stat_cpu #define arch_irq_stat_cpu(cpu) 0 #endif #ifndef arch_irq_stat #define arch_irq_stat() 0 #endif u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) { u64 idle, idle_usecs = -1ULL; if (cpu_online(cpu)) idle_usecs = get_cpu_idle_time_us(cpu, NULL); if (idle_usecs == -1ULL) /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kcs->cpustat[CPUTIME_IDLE]; else idle = idle_usecs * NSEC_PER_USEC; return idle; } static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) { u64 iowait, iowait_usecs = -1ULL; if (cpu_online(cpu)) iowait_usecs = get_cpu_iowait_time_us(cpu, NULL); if (iowait_usecs == -1ULL) /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ iowait = kcs->cpustat[CPUTIME_IOWAIT]; else iowait = iowait_usecs * NSEC_PER_USEC; return iowait; } static void show_irq_gap(struct seq_file *p, unsigned int gap) { static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"; while (gap > 0) { unsigned int inc; inc = min_t(unsigned int, gap, ARRAY_SIZE(zeros) / 2); seq_write(p, zeros, 2 * inc); gap -= inc; } } static void show_all_irqs(struct seq_file *p) { unsigned int i, next = 0; for_each_active_irq(i) { show_irq_gap(p, i - next); seq_put_decimal_ull(p, " ", kstat_irqs_usr(i)); next = i + 1; } show_irq_gap(p, irq_get_nr_irqs() - next); } static int show_stat(struct seq_file *p, void *v) { int i, j; u64 user, nice, system, idle, iowait, irq, softirq, steal; u64 guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec64 boottime; user = nice = system = idle = iowait = irq = softirq = steal = 0; guest = guest_nice = 0; getboottime64(&boottime); /* shift boot timestamp according to the timens offset */ timens_sub_boottime(&boottime); for_each_possible_cpu(i) { struct kernel_cpustat kcpustat; u64 *cpustat = kcpustat.cpustat; kcpustat_cpu_fetch(&kcpustat, i); user += cpustat[CPUTIME_USER]; nice += cpustat[CPUTIME_NICE]; system += cpustat[CPUTIME_SYSTEM]; idle += get_idle_time(&kcpustat, i); iowait += get_iowait_time(&kcpustat, i); irq += cpustat[CPUTIME_IRQ]; softirq += cpustat[CPUTIME_SOFTIRQ]; steal += cpustat[CPUTIME_STEAL]; guest += cpustat[CPUTIME_GUEST]; guest_nice += cpustat[CPUTIME_GUEST_NICE]; sum += kstat_cpu_irqs_sum(i); sum += arch_irq_stat_cpu(i); for (j = 0; j < NR_SOFTIRQS; j++) { unsigned int softirq_stat = kstat_softirqs_cpu(j, i); per_softirq_sums[j] += softirq_stat; sum_softirq += softirq_stat; } } sum += arch_irq_stat(); seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); for_each_online_cpu(i) { struct kernel_cpustat kcpustat; u64 *cpustat = kcpustat.cpustat; kcpustat_cpu_fetch(&kcpustat, i); /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ user = cpustat[CPUTIME_USER]; nice = cpustat[CPUTIME_NICE]; system = cpustat[CPUTIME_SYSTEM]; idle = get_idle_time(&kcpustat, i); iowait = get_iowait_time(&kcpustat, i); irq = cpustat[CPUTIME_IRQ]; softirq = cpustat[CPUTIME_SOFTIRQ]; steal = cpustat[CPUTIME_STEAL]; guest = cpustat[CPUTIME_GUEST]; guest_nice = cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d", i); seq_put_decimal_ull(p, " ", nsec_to_clock_t(user)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); } seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); show_all_irqs(p); seq_printf(p, "\nctxt %llu\n" "btime %llu\n" "processes %lu\n" "procs_running %u\n" "procs_blocked %u\n", nr_context_switches(), (unsigned long long)boottime.tv_sec, total_forks, nr_running(), nr_iowait()); seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq); for (i = 0; i < NR_SOFTIRQS; i++) seq_put_decimal_ull(p, " ", per_softirq_sums[i]); seq_putc(p, '\n'); return 0; } static int stat_open(struct inode *inode, struct file *file) { unsigned int size = 1024 + 128 * num_online_cpus(); /* minimum size to display an interrupt count : 2 bytes */ size += 2 * irq_get_nr_irqs(); return single_open_size(file, show_stat, NULL, size); } static const struct proc_ops stat_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = stat_open, .proc_read_iter = seq_read_iter, .proc_lseek = seq_lseek, .proc_release = single_release, }; static int __init proc_stat_init(void) { proc_create("stat", 0, NULL, &stat_proc_ops); return 0; } fs_initcall(proc_stat_init);
170 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_PLANE_H__ #define __DRM_PLANE_H__ #include <linux/list.h> #include <linux/ctype.h> #include <linux/kmsg_dump.h> #include <drm/drm_mode_object.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_rect.h> #include <drm/drm_modeset_lock.h> #include <drm/drm_util.h> struct drm_crtc; struct drm_plane_size_hint; struct drm_printer; struct drm_modeset_acquire_ctx; enum drm_scaling_filter { DRM_SCALING_FILTER_DEFAULT, DRM_SCALING_FILTER_NEAREST_NEIGHBOR, }; /** * struct drm_plane_state - mutable plane state * * Please note that the destination coordinates @crtc_x, @crtc_y, @crtc_h and * @crtc_w and the source coordinates @src_x, @src_y, @src_h and @src_w are the * raw coordinates provided by userspace. Drivers should use * drm_atomic_helper_check_plane_state() and only use the derived rectangles in * @src and @dst to program the hardware. */ struct drm_plane_state { /** @plane: backpointer to the plane */ struct drm_plane *plane; /** * @crtc: * * Currently bound CRTC, NULL if disabled. Do not write this directly, * use drm_atomic_set_crtc_for_plane() */ struct drm_crtc *crtc; /** * @fb: * * Currently bound framebuffer. Do not write this directly, use * drm_atomic_set_fb_for_plane() */ struct drm_framebuffer *fb; /** * @fence: * * Optional fence to wait for before scanning out @fb. The core atomic * code will set this when userspace is using explicit fencing. Do not * write this field directly for a driver's implicit fence. * * Drivers should store any implicit fence in this from their * &drm_plane_helper_funcs.prepare_fb callback. See * drm_gem_plane_helper_prepare_fb() for a suitable helper. */ struct dma_fence *fence; /** * @crtc_x: * * Left position of visible portion of plane on crtc, signed dest * location allows it to be partially off screen. */ int32_t crtc_x; /** * @crtc_y: * * Upper position of visible portion of plane on crtc, signed dest * location allows it to be partially off screen. */ int32_t crtc_y; /** @crtc_w: width of visible portion of plane on crtc */ /** @crtc_h: height of visible portion of plane on crtc */ uint32_t crtc_w, crtc_h; /** * @src_x: left position of visible portion of plane within plane (in * 16.16 fixed point). */ uint32_t src_x; /** * @src_y: upper position of visible portion of plane within plane (in * 16.16 fixed point). */ uint32_t src_y; /** @src_w: width of visible portion of plane (in 16.16) */ /** @src_h: height of visible portion of plane (in 16.16) */ uint32_t src_h, src_w; /** @hotspot_x: x offset to mouse cursor hotspot */ /** @hotspot_y: y offset to mouse cursor hotspot */ int32_t hotspot_x, hotspot_y; /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as * completely opaque. See drm_plane_create_alpha_property() for more * details. */ u16 alpha; /** * @pixel_blend_mode: * The alpha blending equation selection, describing how the pixels from * the current plane are composited with the background. Value can be * one of DRM_MODE_BLEND_* */ uint16_t pixel_blend_mode; /** * @rotation: * Rotation of the plane. See drm_plane_create_rotation_property() for * more details. */ unsigned int rotation; /** * @zpos: * Priority of the given plane on crtc (optional). * * User-space may set mutable zpos properties so that multiple active * planes on the same CRTC have identical zpos values. This is a * user-space bug, but drivers can solve the conflict by comparing the * plane object IDs; the plane with a higher ID is stacked on top of a * plane with a lower ID. * * See drm_plane_create_zpos_property() and * drm_plane_create_zpos_immutable_property() for more details. */ unsigned int zpos; /** * @normalized_zpos: * Normalized value of zpos: unique, range from 0 to N-1 where N is the * number of active planes for given crtc. Note that the driver must set * &drm_mode_config.normalize_zpos or call drm_atomic_normalize_zpos() to * update this before it can be trusted. */ unsigned int normalized_zpos; /** * @color_encoding: * * Color encoding for non RGB formats */ enum drm_color_encoding color_encoding; /** * @color_range: * * Color range for non RGB formats */ enum drm_color_range color_range; /** * @fb_damage_clips: * * Blob representing damage (area in plane framebuffer that changed * since last plane update) as an array of &drm_mode_rect in framebuffer * coodinates of the attached framebuffer. Note that unlike plane src, * damage clips are not in 16.16 fixed point. * * See drm_plane_get_damage_clips() and * drm_plane_get_damage_clips_count() for accessing these. */ struct drm_property_blob *fb_damage_clips; /** * @ignore_damage_clips: * * Set by drivers to indicate the drm_atomic_helper_damage_iter_init() * helper that the @fb_damage_clips blob property should be ignored. * * See :ref:`damage_tracking_properties` for more information. */ bool ignore_damage_clips; /** * @src: * * source coordinates of the plane (in 16.16). * * When using drm_atomic_helper_check_plane_state(), * the coordinates are clipped, but the driver may choose * to use unclipped coordinates instead when the hardware * performs the clipping automatically. */ /** * @dst: * * clipped destination coordinates of the plane. * * When using drm_atomic_helper_check_plane_state(), * the coordinates are clipped, but the driver may choose * to use unclipped coordinates instead when the hardware * performs the clipping automatically. */ struct drm_rect src, dst; /** * @visible: * * Visibility of the plane. This can be false even if fb!=NULL and * crtc!=NULL, due to clipping. */ bool visible; /** * @scaling_filter: * * Scaling filter to be applied */ enum drm_scaling_filter scaling_filter; /** * @commit: Tracks the pending commit to prevent use-after-free conditions, * and for async plane updates. * * May be NULL. */ struct drm_crtc_commit *commit; /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; /** * @color_mgmt_changed: Color management properties have changed. Used * by the atomic helpers and drivers to steer the atomic commit control * flow. */ bool color_mgmt_changed : 1; }; static inline struct drm_rect drm_plane_state_src(const struct drm_plane_state *state) { struct drm_rect src = { .x1 = state->src_x, .y1 = state->src_y, .x2 = state->src_x + state->src_w, .y2 = state->src_y + state->src_h, }; return src; } static inline struct drm_rect drm_plane_state_dest(const struct drm_plane_state *state) { struct drm_rect dest = { .x1 = state->crtc_x, .y1 = state->crtc_y, .x2 = state->crtc_x + state->crtc_w, .y2 = state->crtc_y + state->crtc_h, }; return dest; } /** * struct drm_plane_funcs - driver plane control functions */ struct drm_plane_funcs { /** * @update_plane: * * This is the legacy entry point to enable and configure the plane for * the given CRTC and framebuffer. It is never called to disable the * plane, i.e. the passed-in crtc and fb paramters are never NULL. * * The source rectangle in frame buffer memory coordinates is given by * the src_x, src_y, src_w and src_h parameters (as 16.16 fixed point * values). Devices that don't support subpixel plane coordinates can * ignore the fractional part. * * The destination rectangle in CRTC coordinates is given by the * crtc_x, crtc_y, crtc_w and crtc_h parameters (as integer values). * Devices scale the source rectangle to the destination rectangle. If * scaling is not supported, and the source rectangle size doesn't match * the destination rectangle size, the driver must return a * -<errorname>EINVAL</errorname> error. * * Drivers implementing atomic modeset should use * drm_atomic_helper_update_plane() to implement this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*update_plane)(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, struct drm_modeset_acquire_ctx *ctx); /** * @disable_plane: * * This is the legacy entry point to disable the plane. The DRM core * calls this method in response to a DRM_IOCTL_MODE_SETPLANE IOCTL call * with the frame buffer ID set to 0. Disabled planes must not be * processed by the CRTC. * * Drivers implementing atomic modeset should use * drm_atomic_helper_disable_plane() to implement this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*disable_plane)(struct drm_plane *plane, struct drm_modeset_acquire_ctx *ctx); /** * @destroy: * * Clean up plane resources. This is only called at driver unload time * through drm_mode_config_cleanup() since a plane cannot be hotplugged * in DRM. */ void (*destroy)(struct drm_plane *plane); /** * @reset: * * Reset plane hardware and software state to off. This function isn't * called by the core directly, only through drm_mode_config_reset(). * It's not a helper hook only for historical reasons. * * Atomic drivers can use drm_atomic_helper_plane_reset() to reset * atomic state using this hook. */ void (*reset)(struct drm_plane *plane); /** * @set_property: * * This is the legacy entry point to update a property attached to the * plane. * * This callback is optional if the driver does not support any legacy * driver-private properties. For atomic drivers it is not used because * property handling is done entirely in the DRM core. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*set_property)(struct drm_plane *plane, struct drm_property *property, uint64_t val); /** * @atomic_duplicate_state: * * Duplicate the current atomic state for this plane and return it. * The core and helpers guarantee that any atomic state duplicated with * this hook and still owned by the caller (i.e. not transferred to the * driver by calling &drm_mode_config_funcs.atomic_commit) will be * cleaned up by calling the @atomic_destroy_state hook in this * structure. * * This callback is mandatory for atomic drivers. * * Atomic drivers which don't subclass &struct drm_plane_state should use * drm_atomic_helper_plane_duplicate_state(). Drivers that subclass the * state structure to extend it with driver-private state should use * __drm_atomic_helper_plane_duplicate_state() to make sure shared state is * duplicated in a consistent fashion across drivers. * * It is an error to call this hook before &drm_plane.state has been * initialized correctly. * * NOTE: * * If the duplicate state references refcounted resources this hook must * acquire a reference for each of them. The driver must release these * references again in @atomic_destroy_state. * * RETURNS: * * Duplicated atomic state or NULL when the allocation failed. */ struct drm_plane_state *(*atomic_duplicate_state)(struct drm_plane *plane); /** * @atomic_destroy_state: * * Destroy a state duplicated with @atomic_duplicate_state and release * or unreference all resources it references * * This callback is mandatory for atomic drivers. */ void (*atomic_destroy_state)(struct drm_plane *plane, struct drm_plane_state *state); /** * @atomic_set_property: * * Decode a driver-private property value and store the decoded value * into the passed-in state structure. Since the atomic core decodes all * standardized properties (even for extensions beyond the core set of * properties which might not be implemented by all drivers) this * requires drivers to subclass the state structure. * * Such driver-private properties should really only be implemented for * truly hardware/vendor specific state. Instead it is preferred to * standardize atomic extension and decode the properties used to expose * such an extension in the core. * * Do not call this function directly, use * drm_atomic_plane_set_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * NOTE: * * This function is called in the state assembly phase of atomic * modesets, which can be aborted for any reason (including on * userspace's request to just check whether a configuration would be * possible). Drivers MUST NOT touch any persistent state (hardware or * software) or data structures except the passed in @state parameter. * * Also since userspace controls in which order properties are set this * function must not do any input validation (since the state update is * incomplete and hence likely inconsistent). Instead any such input * validation must be done in the various atomic_check callbacks. * * RETURNS: * * 0 if the property has been found, -EINVAL if the property isn't * implemented by the driver (which shouldn't ever happen, the core only * asks for properties attached to this plane). No other validation is * allowed by the driver. The core already checks that the property * value is within the range (integer, valid enum value, ...) the driver * set when registering the property. */ int (*atomic_set_property)(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, uint64_t val); /** * @atomic_get_property: * * Reads out the decoded driver-private property. This is used to * implement the GETPLANE IOCTL. * * Do not call this function directly, use * drm_atomic_plane_get_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * RETURNS: * * 0 on success, -EINVAL if the property isn't implemented by the * driver (which should never happen, the core only asks for * properties attached to this plane). */ int (*atomic_get_property)(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, uint64_t *val); /** * @late_register: * * This optional hook can be used to register additional userspace * interfaces attached to the plane like debugfs interfaces. * It is called late in the driver load sequence from drm_dev_register(). * Everything added from this callback should be unregistered in * the early_unregister callback. * * Returns: * * 0 on success, or a negative error code on failure. */ int (*late_register)(struct drm_plane *plane); /** * @early_unregister: * * This optional hook should be used to unregister the additional * userspace interfaces attached to the plane from * @late_register. It is called from drm_dev_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. */ void (*early_unregister)(struct drm_plane *plane); /** * @atomic_print_state: * * If driver subclasses &struct drm_plane_state, it should implement * this optional hook for printing additional driver specific state. * * Do not call this directly, use drm_atomic_plane_print_state() * instead. */ void (*atomic_print_state)(struct drm_printer *p, const struct drm_plane_state *state); /** * @format_mod_supported: * * This optional hook is used for the DRM to determine if the given * format/modifier combination is valid for the plane. This allows the * DRM to generate the correct format bitmask (which formats apply to * which modifier), and to validate modifiers at atomic_check time. * * If not present, then any modifier in the plane's modifier * list is allowed with any of the plane's formats. * * Returns: * * True if the given modifier is valid for that format on the plane. * False otherwise. */ bool (*format_mod_supported)(struct drm_plane *plane, uint32_t format, uint64_t modifier); }; /** * enum drm_plane_type - uapi plane type enumeration * * For historical reasons not all planes are made the same. This enumeration is * used to tell the different types of planes apart to implement the different * uapi semantics for them. For userspace which is universal plane aware and * which is using that atomic IOCTL there's no difference between these planes * (beyong what the driver and hardware can support of course). * * For compatibility with legacy userspace, only overlay planes are made * available to userspace by default. Userspace clients may set the * &DRM_CLIENT_CAP_UNIVERSAL_PLANES client capability bit to indicate that they * wish to receive a universal plane list containing all plane types. See also * drm_for_each_legacy_plane(). * * In addition to setting each plane's type, drivers need to setup the * &drm_crtc.primary and optionally &drm_crtc.cursor pointers for legacy * IOCTLs. See drm_crtc_init_with_planes(). * * WARNING: The values of this enum is UABI since they're exposed in the "type" * property. */ enum drm_plane_type { /** * @DRM_PLANE_TYPE_OVERLAY: * * Overlay planes represent all non-primary, non-cursor planes. Some * drivers refer to these types of planes as "sprites" internally. */ DRM_PLANE_TYPE_OVERLAY, /** * @DRM_PLANE_TYPE_PRIMARY: * * A primary plane attached to a CRTC is the most likely to be able to * light up the CRTC when no scaling/cropping is used and the plane * covers the whole CRTC. */ DRM_PLANE_TYPE_PRIMARY, /** * @DRM_PLANE_TYPE_CURSOR: * * A cursor plane attached to a CRTC is more likely to be able to be * enabled when no scaling/cropping is used and the framebuffer has the * size indicated by &drm_mode_config.cursor_width and * &drm_mode_config.cursor_height. Additionally, if the driver doesn't * support modifiers, the framebuffer should have a linear layout. */ DRM_PLANE_TYPE_CURSOR, }; /** * struct drm_plane - central DRM plane control structure * * Planes represent the scanout hardware of a display block. They receive their * input data from a &drm_framebuffer and feed it to a &drm_crtc. Planes control * the color conversion, see `Plane Composition Properties`_ for more details, * and are also involved in the color conversion of input pixels, see `Color * Management Properties`_ for details on that. */ struct drm_plane { /** @dev: DRM device this plane belongs to */ struct drm_device *dev; /** * @head: * * List of all planes on @dev, linked from &drm_mode_config.plane_list. * Invariant over the lifetime of @dev and therefore does not need * locking. */ struct list_head head; /** @name: human readable name, can be overwritten by the driver */ char *name; /** * @mutex: * * Protects modeset plane state, together with the &drm_crtc.mutex of * CRTC this plane is linked to (when active, getting activated or * getting disabled). * * For atomic drivers specifically this protects @state. */ struct drm_modeset_lock mutex; /** @base: base mode object */ struct drm_mode_object base; /** * @possible_crtcs: pipes this plane can be bound to constructed from * drm_crtc_mask() */ uint32_t possible_crtcs; /** @format_types: array of formats supported by this plane */ uint32_t *format_types; /** @format_count: Size of the array pointed at by @format_types. */ unsigned int format_count; /** * @format_default: driver hasn't supplied supported formats for the * plane. Used by the non-atomic driver compatibility wrapper only. */ bool format_default; /** @modifiers: array of modifiers supported by this plane */ uint64_t *modifiers; /** @modifier_count: Size of the array pointed at by @modifier_count. */ unsigned int modifier_count; /** * @crtc: * * Currently bound CRTC, only meaningful for non-atomic drivers. For * atomic drivers this is forced to be NULL, atomic drivers should * instead check &drm_plane_state.crtc. */ struct drm_crtc *crtc; /** * @fb: * * Currently bound framebuffer, only meaningful for non-atomic drivers. * For atomic drivers this is forced to be NULL, atomic drivers should * instead check &drm_plane_state.fb. */ struct drm_framebuffer *fb; /** * @old_fb: * * Temporary tracking of the old fb while a modeset is ongoing. Only * used by non-atomic drivers, forced to be NULL for atomic drivers. */ struct drm_framebuffer *old_fb; /** @funcs: plane control functions */ const struct drm_plane_funcs *funcs; /** @properties: property tracking for this plane */ struct drm_object_properties properties; /** @type: Type of plane, see &enum drm_plane_type for details. */ enum drm_plane_type type; /** * @index: Position inside the mode_config.list, can be used as an array * index. It is invariant over the lifetime of the plane. */ unsigned index; /** @helper_private: mid-layer private data */ const struct drm_plane_helper_funcs *helper_private; /** * @state: * * Current atomic state for this plane. * * This is protected by @mutex. Note that nonblocking atomic commits * access the current plane state without taking locks. Either by going * through the &struct drm_atomic_state pointers, see * for_each_oldnew_plane_in_state(), for_each_old_plane_in_state() and * for_each_new_plane_in_state(). Or through careful ordering of atomic * commit operations as implemented in the atomic helpers, see * &struct drm_crtc_commit. */ struct drm_plane_state *state; /** * @alpha_property: * Optional alpha property for this plane. See * drm_plane_create_alpha_property(). */ struct drm_property *alpha_property; /** * @zpos_property: * Optional zpos property for this plane. See * drm_plane_create_zpos_property(). */ struct drm_property *zpos_property; /** * @rotation_property: * Optional rotation property for this plane. See * drm_plane_create_rotation_property(). */ struct drm_property *rotation_property; /** * @blend_mode_property: * Optional "pixel blend mode" enum property for this plane. * Blend mode property represents the alpha blending equation selection, * describing how the pixels from the current plane are composited with * the background. */ struct drm_property *blend_mode_property; /** * @color_encoding_property: * * Optional "COLOR_ENCODING" enum property for specifying * color encoding for non RGB formats. * See drm_plane_create_color_properties(). */ struct drm_property *color_encoding_property; /** * @color_range_property: * * Optional "COLOR_RANGE" enum property for specifying * color range for non RGB formats. * See drm_plane_create_color_properties(). */ struct drm_property *color_range_property; /** * @scaling_filter_property: property to apply a particular filter while * scaling. */ struct drm_property *scaling_filter_property; /** * @hotspot_x_property: property to set mouse hotspot x offset. */ struct drm_property *hotspot_x_property; /** * @hotspot_y_property: property to set mouse hotspot y offset. */ struct drm_property *hotspot_y_property; /** * @kmsg_panic: Used to register a panic notifier for this plane */ struct kmsg_dumper kmsg_panic; }; #define obj_to_plane(x) container_of(x, struct drm_plane, base) __printf(9, 10) int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type type, const char *name, ...); void drm_plane_cleanup(struct drm_plane *plane); __printf(10, 11) void *__drmm_universal_plane_alloc(struct drm_device *dev, size_t size, size_t offset, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type plane_type, const char *name, ...); /** * drmm_universal_plane_alloc - Allocate and initialize an universal plane object * @dev: DRM device * @type: the type of the struct which contains struct &drm_plane * @member: the name of the &drm_plane within @type * @possible_crtcs: bitmask of possible CRTCs * @funcs: callbacks for the new plane * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats * @format_modifiers: array of struct drm_format modifiers terminated by * DRM_FORMAT_MOD_INVALID * @plane_type: type of plane (overlay, primary, cursor) * @name: printf style format string for the plane name, or NULL for default name * * Allocates and initializes a plane object of type @type. Cleanup is * automatically handled through registering drm_plane_cleanup() with * drmm_add_action(). * * The @drm_plane_funcs.destroy hook must be NULL. * * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set * @format_modifiers to NULL. The plane will advertise the linear modifier. * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ #define drmm_universal_plane_alloc(dev, type, member, possible_crtcs, funcs, formats, \ format_count, format_modifiers, plane_type, name, ...) \ ((type *)__drmm_universal_plane_alloc(dev, sizeof(type), \ offsetof(type, member), \ possible_crtcs, funcs, formats, \ format_count, format_modifiers, \ plane_type, name, ##__VA_ARGS__)) __printf(10, 11) void *__drm_universal_plane_alloc(struct drm_device *dev, size_t size, size_t offset, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type plane_type, const char *name, ...); /** * drm_universal_plane_alloc() - Allocate and initialize an universal plane object * @dev: DRM device * @type: the type of the struct which contains struct &drm_plane * @member: the name of the &drm_plane within @type * @possible_crtcs: bitmask of possible CRTCs * @funcs: callbacks for the new plane * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats * @format_modifiers: array of struct drm_format modifiers terminated by * DRM_FORMAT_MOD_INVALID * @plane_type: type of plane (overlay, primary, cursor) * @name: printf style format string for the plane name, or NULL for default name * * Allocates and initializes a plane object of type @type. The caller * is responsible for releasing the allocated memory with kfree(). * * Drivers are encouraged to use drmm_universal_plane_alloc() instead. * * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set * @format_modifiers to NULL. The plane will advertise the linear modifier. * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ #define drm_universal_plane_alloc(dev, type, member, possible_crtcs, funcs, formats, \ format_count, format_modifiers, plane_type, name, ...) \ ((type *)__drm_universal_plane_alloc(dev, sizeof(type), \ offsetof(type, member), \ possible_crtcs, funcs, formats, \ format_count, format_modifiers, \ plane_type, name, ##__VA_ARGS__)) /** * drm_plane_index - find the index of a registered plane * @plane: plane to find index for * * Given a registered plane, return the index of that plane within a DRM * device's list of planes. */ static inline unsigned int drm_plane_index(const struct drm_plane *plane) { return plane->index; } /** * drm_plane_mask - find the mask of a registered plane * @plane: plane to find mask for */ static inline u32 drm_plane_mask(const struct drm_plane *plane) { return 1 << drm_plane_index(plane); } struct drm_plane * drm_plane_from_index(struct drm_device *dev, int idx); void drm_plane_force_disable(struct drm_plane *plane); int drm_mode_plane_set_obj_prop(struct drm_plane *plane, struct drm_property *property, uint64_t value); /** * drm_plane_find - find a &drm_plane * @dev: DRM device * @file_priv: drm file to check for lease against. * @id: plane id * * Returns the plane with @id, NULL if it doesn't exist. Simple wrapper around * drm_mode_object_find(). */ static inline struct drm_plane *drm_plane_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id) { struct drm_mode_object *mo; mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_PLANE); return mo ? obj_to_plane(mo) : NULL; } /** * drm_for_each_plane_mask - iterate over planes specified by bitmask * @plane: the loop cursor * @dev: the DRM device * @plane_mask: bitmask of plane indices * * Iterate over all planes specified by bitmask. */ #define drm_for_each_plane_mask(plane, dev, plane_mask) \ list_for_each_entry((plane), &(dev)->mode_config.plane_list, head) \ for_each_if ((plane_mask) & drm_plane_mask(plane)) /** * drm_for_each_legacy_plane - iterate over all planes for legacy userspace * @plane: the loop cursor * @dev: the DRM device * * Iterate over all legacy planes of @dev, excluding primary and cursor planes. * This is useful for implementing userspace apis when userspace is not * universal plane aware. See also &enum drm_plane_type. */ #define drm_for_each_legacy_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) \ for_each_if (plane->type == DRM_PLANE_TYPE_OVERLAY) /** * drm_for_each_plane - iterate over all planes * @plane: the loop cursor * @dev: the DRM device * * Iterate over all planes of @dev, include primary and cursor planes. */ #define drm_for_each_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) bool drm_plane_has_format(struct drm_plane *plane, u32 format, u64 modifier); bool drm_any_plane_has_format(struct drm_device *dev, u32 format, u64 modifier); void drm_plane_enable_fb_damage_clips(struct drm_plane *plane); unsigned int drm_plane_get_damage_clips_count(const struct drm_plane_state *state); struct drm_mode_rect * drm_plane_get_damage_clips(const struct drm_plane_state *state); int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); int drm_plane_add_size_hints_property(struct drm_plane *plane, const struct drm_plane_size_hint *hints, int num_hints); #endif
63 42 42 42 39 39 5 5 4 23 23 23 23 81 76 18 3 45 38 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 // SPDX-License-Identifier: GPL-2.0-only /* * net/dccp/ccid.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * CCID infrastructure */ #include <linux/slab.h> #include "ccid.h" #include "ccids/lib/tfrc.h" static struct ccid_operations *ccids[] = { &ccid2_ops, #ifdef CONFIG_IP_DCCP_CCID3 &ccid3_ops, #endif }; static struct ccid_operations *ccid_by_number(const u8 id) { int i; for (i = 0; i < ARRAY_SIZE(ccids); i++) if (ccids[i]->ccid_id == id) return ccids[i]; return NULL; } /* check that up to @array_len members in @ccid_array are supported */ bool ccid_support_check(u8 const *ccid_array, u8 array_len) { while (array_len > 0) if (ccid_by_number(ccid_array[--array_len]) == NULL) return false; return true; } /** * ccid_get_builtin_ccids - Populate a list of built-in CCIDs * @ccid_array: pointer to copy into * @array_len: value to return length into * * This function allocates memory - caller must see that it is freed after use. */ int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) { *ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any()); if (*ccid_array == NULL) return -ENOBUFS; for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1) (*ccid_array)[*array_len] = ccids[*array_len]->ccid_id; return 0; } int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, char __user *optval, int __user *optlen) { u8 *ccid_array, array_len; int err = 0; if (ccid_get_builtin_ccids(&ccid_array, &array_len)) return -ENOBUFS; if (put_user(array_len, optlen)) err = -EFAULT; else if (len > 0 && copy_to_user(optval, ccid_array, len > array_len ? array_len : len)) err = -EFAULT; kfree(ccid_array); return err; } static __printf(3, 4) struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_fmt, const char *fmt,...) { struct kmem_cache *slab; va_list args; va_start(args, fmt); vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args); va_end(args); slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); return slab; } static void ccid_kmem_cache_destroy(struct kmem_cache *slab) { kmem_cache_destroy(slab); } static int __init ccid_activate(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; ccid_ops->ccid_hc_rx_slab = ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size, ccid_ops->ccid_hc_rx_slab_name, "ccid%u_hc_rx_sock", ccid_ops->ccid_id); if (ccid_ops->ccid_hc_rx_slab == NULL) goto out; ccid_ops->ccid_hc_tx_slab = ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size, ccid_ops->ccid_hc_tx_slab_name, "ccid%u_hc_tx_sock", ccid_ops->ccid_id); if (ccid_ops->ccid_hc_tx_slab == NULL) goto out_free_rx_slab; pr_info("DCCP: Activated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); err = 0; out: return err; out_free_rx_slab: ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; goto out; } static void ccid_deactivate(struct ccid_operations *ccid_ops) { ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab); ccid_ops->ccid_hc_tx_slab = NULL; ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; pr_info("DCCP: Deactivated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); } struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx) { struct ccid_operations *ccid_ops = ccid_by_number(id); struct ccid *ccid = NULL; if (ccid_ops == NULL) goto out; ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab : ccid_ops->ccid_hc_tx_slab, gfp_any()); if (ccid == NULL) goto out; ccid->ccid_ops = ccid_ops; if (rx) { memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size); if (ccid->ccid_ops->ccid_hc_rx_init != NULL && ccid->ccid_ops->ccid_hc_rx_init(ccid, sk) != 0) goto out_free_ccid; } else { memset(ccid + 1, 0, ccid_ops->ccid_hc_tx_obj_size); if (ccid->ccid_ops->ccid_hc_tx_init != NULL && ccid->ccid_ops->ccid_hc_tx_init(ccid, sk) != 0) goto out_free_ccid; } out: return ccid; out_free_ccid: kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab : ccid_ops->ccid_hc_tx_slab, ccid); ccid = NULL; goto out; } void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk) { if (ccid != NULL) { if (ccid->ccid_ops->ccid_hc_rx_exit != NULL) ccid->ccid_ops->ccid_hc_rx_exit(sk); kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid); } } void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk) { if (ccid != NULL) { if (ccid->ccid_ops->ccid_hc_tx_exit != NULL) ccid->ccid_ops->ccid_hc_tx_exit(sk); kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid); } } int __init ccid_initialize_builtins(void) { int i, err = tfrc_lib_init(); if (err) return err; for (i = 0; i < ARRAY_SIZE(ccids); i++) { err = ccid_activate(ccids[i]); if (err) goto unwind_registrations; } return 0; unwind_registrations: while(--i >= 0) ccid_deactivate(ccids[i]); tfrc_lib_exit(); return err; } void ccid_cleanup_builtins(void) { int i; for (i = 0; i < ARRAY_SIZE(ccids); i++) ccid_deactivate(ccids[i]); tfrc_lib_exit(); }
43 43 43 43 60 60 60 4 1 60 4 33 27 14 16 1 26 5 54 42 13 6 1 51 52 37 26 23 41 32 24 45 18 45 45 18 1 10 6 1 59 60 60 60 1 44 12 1 1 4 3 25 1 24 9 19 19 19 19 60 51 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 // SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The IP fragmentation functionality. * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. * David S. Miller : Begin massive cleanup... * Andi Kleen : Add sysctls. * xxxx : Overlapfrag bug. * Ultima : ip_expire() kernel panic. * Bill Hawes : Frag accounting and evictor fixes. * John McDonald : 0 length frag bug. * Alexey Kuznetsov: SMP races, threading, cleanup. * Patrick McHardy : LRU queue of frag heads for evictor. */ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/compiler.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/jiffies.h> #include <linux/skbuff.h> #include <linux/list.h> #include <linux/ip.h> #include <linux/icmp.h> #include <linux/netdevice.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/slab.h> #include <net/route.h> #include <net/dst.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> #include <net/inet_frag.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> #include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/l3mdev.h> /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c * as well. Or notify me, at least. --ANK */ static const char ip_frag_cache_name[] = "ip4-frags"; /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; u8 ecn; /* RFC3168 support */ u16 max_df_size; /* largest frag with DF set seen */ int iif; unsigned int rid; struct inet_peer *peer; }; static u8 ip4_frag_ecn(u8 tos) { return 1 << (tos & INET_ECN_MASK); } static struct inet_frags ip4_frags; static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev); static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { struct ipq *qp = container_of(q, struct ipq, q); const struct frag_v4_compare_key *key = a; struct net *net = q->fqdir->net; struct inet_peer *p = NULL; q->key.v4 = *key; qp->ecn = 0; if (q->fqdir->max_dist) { rcu_read_lock(); p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif); if (p && !refcount_inc_not_zero(&p->refcnt)) p = NULL; rcu_read_unlock(); } qp->peer = p; } static void ip4_frag_free(struct inet_frag_queue *q) { struct ipq *qp; qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); } /* Destruction primitives. */ static void ipq_put(struct ipq *ipq) { inet_frag_put(&ipq->q); } /* Kill ipq entry. It is not destroyed immediately, * because caller (and someone more) holds reference count. */ static void ipq_kill(struct ipq *ipq) { inet_frag_kill(&ipq->q); } static bool frag_expire_skip_icmp(u32 user) { return user == IP_DEFRAG_AF_PACKET || ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN, __IP_DEFRAG_CONNTRACK_IN_END) || ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN, __IP_DEFRAG_CONNTRACK_BRIDGE_IN); } /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(struct timer_list *t) { enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; struct inet_frag_queue *frag = from_timer(frag, t, timer); const struct iphdr *iph; struct sk_buff *head = NULL; struct net *net; struct ipq *qp; qp = container_of(frag, struct ipq, q); net = qp->q.fqdir->net; rcu_read_lock(); /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) goto out; qp->q.flags |= INET_FRAG_DROP; ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN)) goto out; /* sk_buff::dev and sk_buff::rbnode are unionized. So we * pull the head out of the tree in order to be able to * deal with head->dev. */ head = inet_frag_pull_head(&qp->q); if (!head) goto out; head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); reason = ip_route_input_noref(head, iph->daddr, iph->saddr, ip4h_dscp(iph), head->dev); if (reason) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; if (frag_expire_skip_icmp(qp->q.key.v4.user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; spin_unlock(&qp->q.lock); icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); goto out_rcu_unlock; out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); kfree_skb_reason(head, reason); ipq_put(qp); } /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and create new one, if nothing is found. */ static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user, int vif) { struct frag_v4_compare_key key = { .saddr = iph->saddr, .daddr = iph->daddr, .user = user, .vif = vif, .id = iph->id, .protocol = iph->protocol, }; struct inet_frag_queue *q; q = inet_frag_find(net->ipv4.fqdir, &key); if (!q) return NULL; return container_of(q, struct ipq, q); } /* Is the fragment too far ahead to be part of ipq? */ static int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; unsigned int max = qp->q.fqdir->max_dist; unsigned int start, end; int rc; if (!peer || !max) return 0; start = qp->rid; end = atomic_inc_return(&peer->rid); qp->rid = end; rc = qp->q.fragments_tail && (end - start) > max; if (rc) __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS); return rc; } static int ip_frag_reinit(struct ipq *qp) { unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) { refcount_inc(&qp->q.refcnt); return -ETIMEDOUT; } sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments, SKB_DROP_REASON_FRAG_TOO_FAR); sub_frag_mem_limit(qp->q.fqdir, sum_truesize); qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; qp->iif = 0; qp->ecn = 0; return 0; } /* Add new segment to existing queue. */ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct net *net = qp->q.fqdir->net; int ihl, end, flags, offset; struct sk_buff *prev_tail; struct net_device *dev; unsigned int fragsize; int err = -ENOENT; SKB_DR(reason); u8 ecn; /* If reassembly is already done, @skb must be a duplicate frag. */ if (qp->q.flags & INET_FRAG_COMPLETE) { SKB_DR_SET(reason, DUP_FRAG); goto err; } if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && unlikely(ip_frag_too_far(qp)) && unlikely(err = ip_frag_reinit(qp))) { ipq_kill(qp); goto err; } ecn = ip4_frag_ecn(ip_hdr(skb)->tos); offset = ntohs(ip_hdr(skb)->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; offset <<= 3; /* offset is in 8-byte chunks */ ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ if (end < qp->q.len || ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) goto discard_qp; qp->q.flags |= INET_FRAG_LAST_IN; qp->q.len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ if (qp->q.flags & INET_FRAG_LAST_IN) goto discard_qp; qp->q.len = end; } } if (end == offset) goto discard_qp; err = -ENOMEM; if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto discard_qp; err = pskb_trim_rcsum(skb, end - offset); if (err) goto discard_qp; /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); prev_tail = qp->q.fragments_tail; err = inet_frag_queue_insert(&qp->q, skb, offset, end); if (err) goto insert_error; if (dev) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; qp->q.tstamp_type = skb->tstamp_type; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; fragsize = skb->len + ihl; if (fragsize > qp->q.max_size) qp->q.max_size = fragsize; if (ip_hdr(skb)->frag_off & htons(IP_DF) && fragsize > qp->max_df_size) qp->max_df_size = fragsize; if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) { unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; err = ip_frag_reasm(qp, skb, prev_tail, dev); skb->_skb_refdst = orefdst; if (err) inet_frag_kill(&qp->q); return err; } skb_dst_drop(skb); skb_orphan(skb); return -EINPROGRESS; insert_error: if (err == IPFRAG_DUP) { SKB_DR_SET(reason, DUP_FRAG); err = -EINVAL; goto err; } err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); discard_qp: inet_frag_kill(&qp->q); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: kfree_skb_reason(skb, reason); return err; } static bool ip_frag_coalesce_ok(const struct ipq *qp) { return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER; } /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = qp->q.fqdir->net; struct iphdr *iph; void *reasm_data; int len, err; u8 ecn; ipq_kill(qp); ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { err = -EINVAL; goto out_fail; } /* Make the one we just received the head. */ reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail); if (!reasm_data) goto out_nomem; len = ip_hdrlen(skb) + qp->q.len; err = -E2BIG; if (len > 65535) goto out_oversize; inet_frag_reasm_finish(&qp->q, skb, reasm_data, ip_frag_coalesce_ok(qp)); skb->dev = dev; IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); iph = ip_hdr(skb); iph->tot_len = htons(len); iph->tos |= ecn; /* When we set IP_DF on a refragmented skb we must also force a * call to ip_fragment to avoid forwarding a DF-skb of size s while * original sender only sent fragments of size f (where f < s). * * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest * frag seen to avoid sending tiny DF-fragments in case skb was built * from one very small df-fragment and one large non-df frag. */ if (qp->max_df_size == qp->q.max_size) { IPCB(skb)->flags |= IPSKB_FRAG_PMTU; iph->frag_off = htons(IP_DF); } else { iph->frag_off = 0; } ip_send_check(iph); __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; return 0; out_nomem: net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp); err = -ENOMEM; goto out_fail; out_oversize: net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr); out_fail: __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); return err; } /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; int vif = l3mdev_master_ifindex_rcu(dev); struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { int ret; spin_lock(&qp->q.lock); ret = ip_frag_queue(qp, skb); spin_unlock(&qp->q.lock); ipq_put(qp); return ret; } __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) { struct iphdr iph; int netoff; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; netoff = skb_network_offset(skb); if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) return skb; len = ntohs(iph.tot_len); if (skb->len < netoff + len || len < (iph.ihl * 4)) return skb; if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { kfree_skb(skb); return NULL; } if (pskb_trim_rcsum(skb, netoff + len)) { kfree_skb(skb); return NULL; } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; skb_clear_hash(skb); } } return skb; } EXPORT_SYMBOL(ip_check_defrag); #ifdef CONFIG_SYSCTL static int dist_min; static struct ctl_table ip4_frags_ns_ctl_table[] = { { .procname = "ipfrag_high_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ipfrag_low_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ipfrag_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ipfrag_max_dist", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &dist_min, }, }; /* secret interval has been deprecated */ static int ip4_frags_secret_interval_unused; static struct ctl_table ip4_frags_ctl_table[] = { { .procname = "ipfrag_secret_interval", .data = &ip4_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; static int __net_init ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = ip4_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (!table) goto err_alloc; } table[0].data = &net->ipv4.fqdir->high_thresh; table[0].extra1 = &net->ipv4.fqdir->low_thresh; table[1].data = &net->ipv4.fqdir->low_thresh; table[1].extra2 = &net->ipv4.fqdir->high_thresh; table[2].data = &net->ipv4.fqdir->timeout; table[3].data = &net->ipv4.fqdir->max_dist; hdr = register_net_sysctl_sz(net, "net/ipv4", table, ARRAY_SIZE(ip4_frags_ns_ctl_table)); if (!hdr) goto err_reg; net->ipv4.frags_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) { const struct ctl_table *table; table = net->ipv4.frags_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } static void __init ip4_frags_ctl_register(void) { register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); } #else static int ip4_frags_ns_ctl_register(struct net *net) { return 0; } static void ip4_frags_ns_ctl_unregister(struct net *net) { } static void __init ip4_frags_ctl_register(void) { } #endif static int __net_init ipv4_frags_init_net(struct net *net) { int res; res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net); if (res < 0) return res; /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for * the real memory usage, by measuring both the size of frag * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue)) * and the SKB's truesize. * * A 64K fragment consumes 129736 bytes (44*2944)+200 * (1500 truesize == 2944, sizeof(struct ipq) == 200) * * We will commit 4MB at one time. Should we cross that limit * we will prune down to 3MB, making room for approx 8 big 64K * fragments 8x128k. */ net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024; net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024; /* * Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival * by TTL. */ net->ipv4.fqdir->timeout = IP_FRAG_TIME; net->ipv4.fqdir->max_dist = 64; res = ip4_frags_ns_ctl_register(net); if (res < 0) fqdir_exit(net->ipv4.fqdir); return res; } static void __net_exit ipv4_frags_pre_exit_net(struct net *net) { fqdir_pre_exit(net->ipv4.fqdir); } static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); fqdir_exit(net->ipv4.fqdir); } static struct pernet_operations ip4_frags_ops = { .init = ipv4_frags_init_net, .pre_exit = ipv4_frags_pre_exit_net, .exit = ipv4_frags_exit_net, }; static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed) { return jhash2(data, sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); } static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed) { const struct inet_frag_queue *fq = data; return jhash2((const u32 *)&fq->key.v4, sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); } static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) { const struct frag_v4_compare_key *key = arg->key; const struct inet_frag_queue *fq = ptr; return !!memcmp(&fq->key, key, sizeof(*key)); } static const struct rhashtable_params ip4_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), .key_offset = offsetof(struct inet_frag_queue, key), .key_len = sizeof(struct frag_v4_compare_key), .hashfn = ip4_key_hashfn, .obj_hashfn = ip4_obj_hashfn, .obj_cmpfn = ip4_obj_cmpfn, .automatic_shrinking = true, }; void __init ipfrag_init(void) { ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.frag_expire = ip_expire; ip4_frags.frags_cache_name = ip_frag_cache_name; ip4_frags.rhash_params = ip4_rhash_params; if (inet_frags_init(&ip4_frags)) panic("IP: failed to allocate ip4_frags cache\n"); ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); }
3 10 5 5 3 7 5 5 5 5 2 3 2 5 5 4 5 1 4 5 5 5 5 5 5 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> static int get_ifindex(const struct net_device *dev) { return dev ? dev->ifindex : 0; } static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, const struct nft_pktinfo *pkt, const struct net_device *dev, struct ipv6hdr *iph) { int lookup_flags = 0; if (priv->flags & NFTA_FIB_F_DADDR) { fl6->daddr = iph->daddr; fl6->saddr = iph->saddr; } else { if (nft_hook(pkt) == NF_INET_FORWARD && priv->flags & NFTA_FIB_F_IIF) fl6->flowi6_iif = nft_out(pkt)->ifindex; fl6->daddr = iph->saddr; fl6->saddr = iph->daddr; } if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); } if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; if (priv->flags & NFTA_FIB_F_MARK) fl6->flowi6_mark = pkt->skb->mark; fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; return lookup_flags; } static u32 __nft_fib6_eval_type(const struct nft_fib *priv, const struct nft_pktinfo *pkt, struct ipv6hdr *iph) { const struct net_device *dev = NULL; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; u32 ret = 0; if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; if (rt->rt6i_flags & RTF_REJECT) { route_err = rt->dst.error; dst_release(&rt->dst); goto err; } if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) ret = RTN_ANYCAST; else if (!dev && rt->rt6i_flags & RTF_LOCAL) ret = RTN_LOCAL; dst_release(&rt->dst); if (ret) return ret; addrtype = ipv6_addr_type(&fl6.daddr); if (addrtype & IPV6_ADDR_MULTICAST) return RTN_MULTICAST; if (addrtype & IPV6_ADDR_UNICAST) return RTN_UNICAST; return RTN_UNSPEC; err: switch (route_err) { case -EINVAL: return RTN_BLACKHOLE; case -EACCES: return RTN_PROHIBIT; case -EAGAIN: return RTN_THROW; default: break; } return RTN_UNREACHABLE; } void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); u32 *dest = &regs->data[priv->dreg]; struct ipv6hdr *iph, _iph; iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } *dest = __nft_fib6_eval_type(priv, pkt, iph); } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph) { if (likely(next != IPPROTO_ICMPV6)) return false; if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY) return false; return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; } void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); const struct net_device *oif = NULL; u32 *dest = &regs->data[priv->dreg]; struct ipv6hdr *iph, _iph; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; struct rt6_info *rt; int lookup_flags; if (priv->flags & NFTA_FIB_F_IIF) oif = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) oif = nft_out(pkt); iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); if (nft_hook(pkt) == NF_INET_PRE_ROUTING || nft_hook(pkt) == NF_INET_INGRESS) { if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { nft_fib_store_result(dest, priv, nft_in(pkt)); return; } } *dest = 0; rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, lookup_flags); if (rt->dst.error) goto put_rt_err; /* Should not see RTF_LOCAL here */ if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; if (oif && oif != rt->rt6i_idev->dev && l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) goto put_rt_err; nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); put_rt_err: ip6_rt_put(rt); } EXPORT_SYMBOL_GPL(nft_fib6_eval); static struct nft_expr_type nft_fib6_type; static const struct nft_expr_ops nft_fib6_type_ops = { .type = &nft_fib6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib6_eval_type, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib6_ops = { .type = &nft_fib6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib6_eval, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * nft_fib6_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { enum nft_fib_result result; if (!tb[NFTA_FIB_RESULT]) return ERR_PTR(-EINVAL); result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (result) { case NFT_FIB_RESULT_OIF: return &nft_fib6_ops; case NFT_FIB_RESULT_OIFNAME: return &nft_fib6_ops; case NFT_FIB_RESULT_ADDRTYPE: return &nft_fib6_type_ops; default: return ERR_PTR(-EOPNOTSUPP); } } static struct nft_expr_type nft_fib6_type __read_mostly = { .name = "fib", .select_ops = nft_fib6_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV6, .owner = THIS_MODULE, }; static int __init nft_fib6_module_init(void) { return nft_register_expr(&nft_fib6_type); } static void __exit nft_fib6_module_exit(void) { nft_unregister_expr(&nft_fib6_type); } module_init(nft_fib6_module_init); module_exit(nft_fib6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); MODULE_DESCRIPTION("nftables fib / ipv6 route lookup support");
7269 7270 7276 1905 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 // SPDX-License-Identifier: GPL-2.0 /* * Functions related to generic timeout handling of requests. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/blkdev.h> #include <linux/fault-inject.h> #include "blk.h" #include "blk-mq.h" #ifdef CONFIG_FAIL_IO_TIMEOUT static DECLARE_FAULT_ATTR(fail_io_timeout); static int __init setup_fail_io_timeout(char *str) { return setup_fault_attr(&fail_io_timeout, str); } __setup("fail_io_timeout=", setup_fail_io_timeout); bool __blk_should_fake_timeout(struct request_queue *q) { return should_fail(&fail_io_timeout, 1); } EXPORT_SYMBOL_GPL(__blk_should_fake_timeout); static int __init fail_io_timeout_debugfs(void) { struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout", NULL, &fail_io_timeout); return PTR_ERR_OR_ZERO(dir); } late_initcall(fail_io_timeout_debugfs); ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags); return sprintf(buf, "%d\n", set != 0); } ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct gendisk *disk = dev_to_disk(dev); int val; if (count) { struct request_queue *q = disk->queue; char *p = (char *) buf; val = simple_strtoul(p, &p, 10); if (val) blk_queue_flag_set(QUEUE_FLAG_FAIL_IO, q); else blk_queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); } return count; } #endif /* CONFIG_FAIL_IO_TIMEOUT */ /** * blk_abort_request - Request recovery for the specified command * @req: pointer to the request of interest * * This function requests that the block layer start recovery for the * request by deleting the timer and calling the q's timeout function. * LLDDs who implement their own error recovery MAY ignore the timeout * event if they generated blk_abort_request. */ void blk_abort_request(struct request *req) { /* * All we need to ensure is that timeout scan takes place * immediately and that scan sees the new timeout value. * No need for fancy synchronizations. */ WRITE_ONCE(req->deadline, jiffies); kblockd_schedule_work(&req->q->timeout_work); } EXPORT_SYMBOL_GPL(blk_abort_request); static unsigned long blk_timeout_mask __read_mostly; static int __init blk_timeout_init(void) { blk_timeout_mask = roundup_pow_of_two(HZ) - 1; return 0; } late_initcall(blk_timeout_init); /* * Just a rough estimate, we don't care about specific values for timeouts. */ static inline unsigned long blk_round_jiffies(unsigned long j) { return (j + blk_timeout_mask) + 1; } unsigned long blk_rq_timeout(unsigned long timeout) { unsigned long maxt; maxt = blk_round_jiffies(jiffies + BLK_MAX_TIMEOUT); if (time_after(timeout, maxt)) timeout = maxt; return timeout; } /** * blk_add_timer - Start timeout timer for a single request * @req: request that is about to start running. * * Notes: * Each request has its own timer, and as it is added to the queue, we * set up the timer. When the request completes, we cancel the timer. */ void blk_add_timer(struct request *req) { struct request_queue *q = req->q; unsigned long expiry; /* * Some LLDs, like scsi, peek at the timeout to prevent a * command from being retried forever. */ if (!req->timeout) req->timeout = q->rq_timeout; req->rq_flags &= ~RQF_TIMED_OUT; expiry = jiffies + req->timeout; WRITE_ONCE(req->deadline, expiry); /* * If the timer isn't already pending or this timeout is earlier * than an existing one, modify the timer. Round up to next nearest * second. */ expiry = blk_rq_timeout(blk_round_jiffies(expiry)); if (!timer_pending(&q->timeout) || time_before(expiry, q->timeout.expires)) { unsigned long diff = q->timeout.expires - expiry; /* * Due to added timer slack to group timers, the timer * will often be a little in front of what we asked for. * So apply some tolerance here too, otherwise we keep * modifying the timer because expires for value X * will be X + something. */ if (!timer_pending(&q->timeout) || (diff >= HZ / 2)) mod_timer(&q->timeout, expiry); } }
47 2 3 3 4 1 18 13 5 17 2 3 1 1 4 1 8 3 12 183 12 12 6 6 243 2 242 2 1 171 44 187 170 28 1 29 27 4 4 20 20 7 3 3 27 2 5 1 13 2 3 2 1 3 16 5 2 1 14 11 18 1 9 2 5 3 15 32 31 1 2 2 25 2 1 22 2 16 6 3 72 16 5 28 7 16 2 10 31 5 6 1 2 1 5 11 12 3 4 2 5 5 38 38 41 48 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool_netlink.h> #include <linux/bitmap.h> #include "netlink.h" #include "bitset.h" /* Some bitmaps are internally represented as an array of unsigned long, some * as an array of u32 (some even as single u32 for now). To avoid the need of * wrappers on caller side, we provide two set of functions: those with "32" * suffix in their names expect u32 based bitmaps, those without it expect * unsigned long bitmaps. */ static u32 ethnl_lower_bits(unsigned int n) { return ~(u32)0 >> (32 - n % 32); } static u32 ethnl_upper_bits(unsigned int n) { return ~(u32)0 << (n % 32); } /** * ethnl_bitmap32_clear() - Clear u32 based bitmap * @dst: bitmap to clear * @start: beginning of the interval * @end: end of the interval * @mod: set if bitmap was modified * * Clear @nbits bits of a bitmap with indices @start <= i < @end */ static void ethnl_bitmap32_clear(u32 *dst, unsigned int start, unsigned int end, bool *mod) { unsigned int start_word = start / 32; unsigned int end_word = end / 32; unsigned int i; u32 mask; if (end <= start) return; if (start % 32) { mask = ethnl_upper_bits(start); if (end_word == start_word) { mask &= ethnl_lower_bits(end); if (dst[start_word] & mask) { dst[start_word] &= ~mask; *mod = true; } return; } if (dst[start_word] & mask) { dst[start_word] &= ~mask; *mod = true; } start_word++; } for (i = start_word; i < end_word; i++) { if (dst[i]) { dst[i] = 0; *mod = true; } } if (end % 32) { mask = ethnl_lower_bits(end); if (dst[end_word] & mask) { dst[end_word] &= ~mask; *mod = true; } } } /** * ethnl_bitmap32_not_zero() - Check if any bit is set in an interval * @map: bitmap to test * @start: beginning of the interval * @end: end of the interval * * Return: true if there is non-zero bit with index @start <= i < @end, * false if the whole interval is zero */ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, unsigned int end) { unsigned int start_word = start / 32; unsigned int end_word = end / 32; u32 mask; if (end <= start) return true; if (start % 32) { mask = ethnl_upper_bits(start); if (end_word == start_word) { mask &= ethnl_lower_bits(end); return map[start_word] & mask; } if (map[start_word] & mask) return true; start_word++; } if (!memchr_inv(map + start_word, '\0', (end_word - start_word) * sizeof(u32))) return true; if (end % 32 == 0) return true; return map[end_word] & ethnl_lower_bits(end); } /** * ethnl_bitmap32_update() - Modify u32 based bitmap according to value/mask * pair * @dst: bitmap to update * @nbits: bit size of the bitmap * @value: values to set * @mask: mask of bits to set * @mod: set to true if bitmap is modified, preserve if not * * Set bits in @dst bitmap which are set in @mask to values from @value, leave * the rest untouched. If destination bitmap was modified, set @mod to true, * leave as it is if not. */ static void ethnl_bitmap32_update(u32 *dst, unsigned int nbits, const u32 *value, const u32 *mask, bool *mod) { while (nbits > 0) { u32 real_mask = mask ? *mask : ~(u32)0; u32 new_value; if (nbits < 32) real_mask &= ethnl_lower_bits(nbits); new_value = (*dst & ~real_mask) | (*value & real_mask); if (new_value != *dst) { *dst = new_value; *mod = true; } if (nbits <= 32) break; dst++; nbits -= 32; value++; if (mask) mask++; } } static bool ethnl_bitmap32_test_bit(const u32 *map, unsigned int index) { return map[index / 32] & (1U << (index % 32)); } /** * ethnl_bitset32_size() - Calculate size of bitset nested attribute * @val: value bitmap (u32 based) * @mask: mask bitmap (u32 based, optional) * @nbits: bit length of the bitset * @names: array of bit names (optional) * @compact: assume compact format for output * * Estimate length of netlink attribute composed by a later call to * ethnl_put_bitset32() call with the same arguments. * * Return: negative error code or attribute length estimate */ int ethnl_bitset32_size(const u32 *val, const u32 *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { unsigned int len = 0; /* list flag */ if (!mask) len += nla_total_size(sizeof(u32)); /* size */ len += nla_total_size(sizeof(u32)); if (compact) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); /* value, mask */ len += (mask ? 2 : 1) * nla_total_size(nwords * sizeof(u32)); } else { unsigned int bits_len = 0; unsigned int bit_len, i; for (i = 0; i < nbits; i++) { const char *name = names ? names[i] : NULL; if (!ethnl_bitmap32_test_bit(mask ?: val, i)) continue; /* index */ bit_len = nla_total_size(sizeof(u32)); /* name */ if (name) bit_len += ethnl_strz_size(name); /* value */ if (mask && ethnl_bitmap32_test_bit(val, i)) bit_len += nla_total_size(0); /* bit nest */ bits_len += nla_total_size(bit_len); } /* bits nest */ len += nla_total_size(bits_len); } /* outermost nest */ return nla_total_size(len); } /** * ethnl_put_bitset32() - Put a bitset nest into a message * @skb: skb with the message * @attrtype: attribute type for the bitset nest * @val: value bitmap (u32 based) * @mask: mask bitmap (u32 based, optional) * @nbits: bit length of the bitset * @names: array of bit names (optional) * @compact: use compact format for the output * * Compose a nested attribute representing a bitset. If @mask is null, simple * bitmap (bit list) is created, if @mask is provided, represent a value/mask * pair. Bit names are only used in verbose mode and when provided by calller. * * Return: 0 on success, negative error value on error */ int ethnl_put_bitset32(struct sk_buff *skb, int attrtype, const u32 *val, const u32 *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { struct nlattr *nest; struct nlattr *attr; nest = nla_nest_start(skb, attrtype); if (!nest) return -EMSGSIZE; if (!mask && nla_put_flag(skb, ETHTOOL_A_BITSET_NOMASK)) goto nla_put_failure; if (nla_put_u32(skb, ETHTOOL_A_BITSET_SIZE, nbits)) goto nla_put_failure; if (compact) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); unsigned int nbytes = nwords * sizeof(u32); u32 *dst; attr = nla_reserve(skb, ETHTOOL_A_BITSET_VALUE, nbytes); if (!attr) goto nla_put_failure; dst = nla_data(attr); memcpy(dst, val, nbytes); if (nbits % 32) dst[nwords - 1] &= ethnl_lower_bits(nbits); if (mask) { attr = nla_reserve(skb, ETHTOOL_A_BITSET_MASK, nbytes); if (!attr) goto nla_put_failure; dst = nla_data(attr); memcpy(dst, mask, nbytes); if (nbits % 32) dst[nwords - 1] &= ethnl_lower_bits(nbits); } } else { struct nlattr *bits; unsigned int i; bits = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS); if (!bits) goto nla_put_failure; for (i = 0; i < nbits; i++) { const char *name = names ? names[i] : NULL; if (!ethnl_bitmap32_test_bit(mask ?: val, i)) continue; attr = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS_BIT); if (!attr) goto nla_put_failure; if (nla_put_u32(skb, ETHTOOL_A_BITSET_BIT_INDEX, i)) goto nla_put_failure; if (name && ethnl_put_strz(skb, ETHTOOL_A_BITSET_BIT_NAME, name)) goto nla_put_failure; if (mask && ethnl_bitmap32_test_bit(val, i) && nla_put_flag(skb, ETHTOOL_A_BITSET_BIT_VALUE)) goto nla_put_failure; nla_nest_end(skb, attr); } nla_nest_end(skb, bits); } nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static const struct nla_policy bitset_policy[] = { [ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG }, [ETHTOOL_A_BITSET_SIZE] = NLA_POLICY_MAX(NLA_U32, ETHNL_MAX_BITSET_SIZE), [ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED }, [ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY }, [ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY }, }; static const struct nla_policy bit_policy[] = { [ETHTOOL_A_BITSET_BIT_INDEX] = { .type = NLA_U32 }, [ETHTOOL_A_BITSET_BIT_NAME] = { .type = NLA_NUL_STRING }, [ETHTOOL_A_BITSET_BIT_VALUE] = { .type = NLA_FLAG }, }; /** * ethnl_bitset_is_compact() - check if bitset attribute represents a compact * bitset * @bitset: nested attribute representing a bitset * @compact: pointer for return value * * Return: 0 on success, negative error code on failure */ int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact) { struct nlattr *tb[ARRAY_SIZE(bitset_policy)]; int ret; ret = nla_parse_nested(tb, ARRAY_SIZE(bitset_policy) - 1, bitset, bitset_policy, NULL); if (ret < 0) return ret; if (tb[ETHTOOL_A_BITSET_BITS]) { if (tb[ETHTOOL_A_BITSET_VALUE] || tb[ETHTOOL_A_BITSET_MASK]) return -EINVAL; *compact = false; return 0; } if (!tb[ETHTOOL_A_BITSET_SIZE] || !tb[ETHTOOL_A_BITSET_VALUE]) return -EINVAL; *compact = true; return 0; } /** * ethnl_name_to_idx() - look up string index for a name * @names: array of ETH_GSTRING_LEN sized strings * @n_names: number of strings in the array * @name: name to look up * * Return: index of the string if found, -ENOENT if not found */ static int ethnl_name_to_idx(ethnl_string_array_t names, unsigned int n_names, const char *name) { unsigned int i; if (!names) return -ENOENT; for (i = 0; i < n_names; i++) { /* names[i] may not be null terminated */ if (!strncmp(names[i], name, ETH_GSTRING_LEN) && strlen(name) <= ETH_GSTRING_LEN) return i; } return -ENOENT; } static int ethnl_parse_bit(unsigned int *index, bool *val, unsigned int nbits, const struct nlattr *bit_attr, bool no_mask, ethnl_string_array_t names, struct netlink_ext_ack *extack) { struct nlattr *tb[ARRAY_SIZE(bit_policy)]; int ret, idx; ret = nla_parse_nested(tb, ARRAY_SIZE(bit_policy) - 1, bit_attr, bit_policy, extack); if (ret < 0) return ret; if (tb[ETHTOOL_A_BITSET_BIT_INDEX]) { const char *name; idx = nla_get_u32(tb[ETHTOOL_A_BITSET_BIT_INDEX]); if (idx >= nbits) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_BIT_INDEX], "bit index too high"); return -EOPNOTSUPP; } name = names ? names[idx] : NULL; if (tb[ETHTOOL_A_BITSET_BIT_NAME] && name && strncmp(nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME]), name, nla_len(tb[ETHTOOL_A_BITSET_BIT_NAME]))) { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "bit index and name mismatch"); return -EINVAL; } } else if (tb[ETHTOOL_A_BITSET_BIT_NAME]) { idx = ethnl_name_to_idx(names, nbits, nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME])); if (idx < 0) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_BIT_NAME], "bit name not found"); return -EOPNOTSUPP; } } else { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "neither bit index nor name specified"); return -EINVAL; } *index = idx; *val = no_mask || tb[ETHTOOL_A_BITSET_BIT_VALUE]; return 0; } /** * ethnl_bitmap32_equal() - Compare two bitmaps * @map1: first bitmap * @map2: second bitmap * @nbits: bit size to compare * * Return: true if first @nbits are equal, false if not */ static bool ethnl_bitmap32_equal(const u32 *map1, const u32 *map2, unsigned int nbits) { if (memcmp(map1, map2, nbits / 32 * sizeof(u32))) return false; if (nbits % 32 == 0) return true; return !((map1[nbits / 32] ^ map2[nbits / 32]) & ethnl_lower_bits(nbits % 32)); } static int ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, const struct nlattr *attr, struct nlattr **tb, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { u32 *saved_bitmap = NULL; struct nlattr *bit_attr; bool no_mask; int rem; int ret; if (tb[ETHTOOL_A_BITSET_VALUE]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], "value only allowed in compact bitset"); return -EINVAL; } if (tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "mask only allowed in compact bitset"); return -EINVAL; } no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; if (no_mask) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); unsigned int nbytes = nwords * sizeof(u32); bool dummy; /* The bitmap size is only the size of the map part without * its mask part. */ saved_bitmap = kcalloc(nwords, sizeof(u32), GFP_KERNEL); if (!saved_bitmap) return -ENOMEM; memcpy(saved_bitmap, bitmap, nbytes); ethnl_bitmap32_clear(bitmap, 0, nbits, &dummy); } nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { bool old_val, new_val; unsigned int idx; if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS"); kfree(saved_bitmap); return -EINVAL; } ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask, names, extack); if (ret < 0) { kfree(saved_bitmap); return ret; } old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32)); if (new_val != old_val) { if (new_val) bitmap[idx / 32] |= ((u32)1 << (idx % 32)); else bitmap[idx / 32] &= ~((u32)1 << (idx % 32)); if (!no_mask) *mod = true; } } if (no_mask && !ethnl_bitmap32_equal(saved_bitmap, bitmap, nbits)) *mod = true; kfree(saved_bitmap); return 0; } static int ethnl_compact_sanity_checks(unsigned int nbits, const struct nlattr *nest, struct nlattr **tb, struct netlink_ext_ack *extack) { bool no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; unsigned int attr_nbits, attr_nwords; const struct nlattr *test_attr; if (no_mask && tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "mask not allowed in list bitset"); return -EINVAL; } if (!tb[ETHTOOL_A_BITSET_SIZE]) { NL_SET_ERR_MSG_ATTR(extack, nest, "missing size in compact bitset"); return -EINVAL; } if (!tb[ETHTOOL_A_BITSET_VALUE]) { NL_SET_ERR_MSG_ATTR(extack, nest, "missing value in compact bitset"); return -EINVAL; } if (!no_mask && !tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, nest, "missing mask in compact nonlist bitset"); return -EINVAL; } attr_nbits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); attr_nwords = DIV_ROUND_UP(attr_nbits, 32); if (nla_len(tb[ETHTOOL_A_BITSET_VALUE]) != attr_nwords * sizeof(u32)) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], "bitset value length does not match size"); return -EINVAL; } if (tb[ETHTOOL_A_BITSET_MASK] && nla_len(tb[ETHTOOL_A_BITSET_MASK]) != attr_nwords * sizeof(u32)) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "bitset mask length does not match size"); return -EINVAL; } if (attr_nbits <= nbits) return 0; test_attr = no_mask ? tb[ETHTOOL_A_BITSET_VALUE] : tb[ETHTOOL_A_BITSET_MASK]; if (ethnl_bitmap32_not_zero(nla_data(test_attr), nbits, attr_nbits)) { NL_SET_ERR_MSG_ATTR(extack, test_attr, "cannot modify bits past kernel bitset size"); return -EINVAL; } return 0; } /** * ethnl_update_bitset32() - Apply a bitset nest to a u32 based bitmap * @bitmap: bitmap to update * @nbits: size of the updated bitmap in bits * @attr: nest attribute to parse and apply * @names: array of bit names; may be null for compact format * @extack: extack for error reporting * @mod: set this to true if bitmap is modified, leave as it is if not * * Apply bitset netsted attribute to a bitmap. If the attribute represents * a bit list, @bitmap is set to its contents; otherwise, bits in mask are * set to values from value. Bitmaps in the attribute may be longer than * @nbits but the message must not request modifying any bits past @nbits. * * Return: negative error code on failure, 0 on success */ int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { struct nlattr *tb[ARRAY_SIZE(bitset_policy)]; unsigned int change_bits; bool no_mask; int ret; if (!attr) return 0; ret = nla_parse_nested(tb, ARRAY_SIZE(bitset_policy) - 1, attr, bitset_policy, extack); if (ret < 0) return ret; if (tb[ETHTOOL_A_BITSET_BITS]) return ethnl_update_bitset32_verbose(bitmap, nbits, attr, tb, names, extack, mod); ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack); if (ret < 0) return ret; no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; change_bits = min_t(unsigned int, nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]), nbits); ethnl_bitmap32_update(bitmap, change_bits, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), no_mask ? NULL : nla_data(tb[ETHTOOL_A_BITSET_MASK]), mod); if (no_mask && change_bits < nbits) ethnl_bitmap32_clear(bitmap, change_bits, nbits, mod); return 0; } /** * ethnl_parse_bitset() - Compute effective value and mask from bitset nest * @val: unsigned long based bitmap to put value into * @mask: unsigned long based bitmap to put mask into * @nbits: size of @val and @mask bitmaps * @attr: nest attribute to parse and apply * @names: array of bit names; may be null for compact format * @extack: extack for error reporting * * Provide @nbits size long bitmaps for value and mask so that * x = (val & mask) | (x & ~mask) would modify any @nbits sized bitmap x * the same way ethnl_update_bitset() with the same bitset attribute would. * * Return: negative error code on failure, 0 on success */ int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack) { struct nlattr *tb[ARRAY_SIZE(bitset_policy)]; const struct nlattr *bit_attr; bool no_mask; int rem; int ret; if (!attr) return 0; ret = nla_parse_nested(tb, ARRAY_SIZE(bitset_policy) - 1, attr, bitset_policy, extack); if (ret < 0) return ret; no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; if (!tb[ETHTOOL_A_BITSET_BITS]) { unsigned int change_bits; ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack); if (ret < 0) return ret; change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); if (change_bits > nbits) change_bits = nbits; bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), change_bits); if (change_bits < nbits) bitmap_clear(val, change_bits, nbits - change_bits); if (no_mask) { bitmap_fill(mask, nbits); } else { bitmap_from_arr32(mask, nla_data(tb[ETHTOOL_A_BITSET_MASK]), change_bits); if (change_bits < nbits) bitmap_clear(mask, change_bits, nbits - change_bits); } return 0; } if (tb[ETHTOOL_A_BITSET_VALUE]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], "value only allowed in compact bitset"); return -EINVAL; } if (tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "mask only allowed in compact bitset"); return -EINVAL; } bitmap_zero(val, nbits); if (no_mask) bitmap_fill(mask, nbits); else bitmap_zero(mask, nbits); nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { unsigned int idx; bool bit_val; ret = ethnl_parse_bit(&idx, &bit_val, nbits, bit_attr, no_mask, names, extack); if (ret < 0) return ret; if (bit_val) __set_bit(idx, val); if (!no_mask) __set_bit(idx, mask); } return 0; } #if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) /* 64-bit big endian architectures are the only case when u32 based bitmaps * and unsigned long based bitmaps have different memory layout so that we * cannot simply cast the latter to the former and need actual wrappers * converting the latter to the former. * * To reduce the number of slab allocations, the wrappers use fixed size local * variables for bitmaps up to ETHNL_SMALL_BITMAP_BITS bits which is the * majority of bitmaps used by ethtool. */ #define ETHNL_SMALL_BITMAP_BITS 128 #define ETHNL_SMALL_BITMAP_WORDS DIV_ROUND_UP(ETHNL_SMALL_BITMAP_BITS, 32) int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS]; u32 small_val32[ETHNL_SMALL_BITMAP_WORDS]; u32 *mask32; u32 *val32; int ret; if (nbits > ETHNL_SMALL_BITMAP_BITS) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL); if (!val32) return -ENOMEM; mask32 = val32 + nwords; } else { val32 = small_val32; mask32 = small_mask32; } bitmap_to_arr32(val32, val, nbits); if (mask) bitmap_to_arr32(mask32, mask, nbits); else mask32 = NULL; ret = ethnl_bitset32_size(val32, mask32, nbits, names, compact); if (nbits > ETHNL_SMALL_BITMAP_BITS) kfree(val32); return ret; } int ethnl_put_bitset(struct sk_buff *skb, int attrtype, const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS]; u32 small_val32[ETHNL_SMALL_BITMAP_WORDS]; u32 *mask32; u32 *val32; int ret; if (nbits > ETHNL_SMALL_BITMAP_BITS) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL); if (!val32) return -ENOMEM; mask32 = val32 + nwords; } else { val32 = small_val32; mask32 = small_mask32; } bitmap_to_arr32(val32, val, nbits); if (mask) bitmap_to_arr32(mask32, mask, nbits); else mask32 = NULL; ret = ethnl_put_bitset32(skb, attrtype, val32, mask32, nbits, names, compact); if (nbits > ETHNL_SMALL_BITMAP_BITS) kfree(val32); return ret; } int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { u32 small_bitmap32[ETHNL_SMALL_BITMAP_WORDS]; u32 *bitmap32 = small_bitmap32; bool u32_mod = false; int ret; if (nbits > ETHNL_SMALL_BITMAP_BITS) { unsigned int dst_words = DIV_ROUND_UP(nbits, 32); bitmap32 = kmalloc_array(dst_words, sizeof(u32), GFP_KERNEL); if (!bitmap32) return -ENOMEM; } bitmap_to_arr32(bitmap32, bitmap, nbits); ret = ethnl_update_bitset32(bitmap32, nbits, attr, names, extack, &u32_mod); if (u32_mod) { bitmap_from_arr32(bitmap, bitmap32, nbits); *mod = true; } if (nbits > ETHNL_SMALL_BITMAP_BITS) kfree(bitmap32); return ret; } #else /* On little endian 64-bit and all 32-bit architectures, an unsigned long * based bitmap can be interpreted as u32 based one using a simple cast. */ int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { return ethnl_bitset32_size((const u32 *)val, (const u32 *)mask, nbits, names, compact); } int ethnl_put_bitset(struct sk_buff *skb, int attrtype, const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { return ethnl_put_bitset32(skb, attrtype, (const u32 *)val, (const u32 *)mask, nbits, names, compact); } int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { return ethnl_update_bitset32((u32 *)bitmap, nbits, attr, names, extack, mod); } #endif /* BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) */
11 2 9 2 7 3 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> #include <linux/netfilter/x_tables.h> static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; struct in6_addr pfx; __wsum src_sum, dst_sum; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; /* Ensure that LSB of prefix is zero */ ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) return -EINVAL; ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) return -EINVAL; src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, struct in6_addr *addr) { unsigned int pfx_len; unsigned int i, idx; __be32 mask; __sum16 sum; pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); for (i = 0; i < pfx_len; i += 32) { if (pfx_len - i >= 32) mask = 0; else mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) idx = 3; else { for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { if ((__force __sum16)addr->s6_addr16[idx] != CSUM_MANGLED_0) break; } if (idx == ARRAY_SIZE(addr->s6_addr16)) return false; } sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; return true; } static struct ipv6hdr *icmpv6_bounced_ipv6hdr(struct sk_buff *skb, struct ipv6hdr *_bounced_hdr) { if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NULL; if (!icmpv6_is_err(icmp6_hdr(skb)->icmp6_type)) return NULL; return skb_header_pointer(skb, skb_transport_offset(skb) + sizeof(struct icmp6hdr), sizeof(struct ipv6hdr), _bounced_hdr); } static unsigned int ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, saddr)); return NF_DROP; } /* rewrite dst addr of bounced packet which was sent to dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->daddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->daddr); } return XT_CONTINUE; } static unsigned int ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, daddr)); return NF_DROP; } /* rewrite src addr of bounced packet which was sent from dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->saddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->saddr); } return XT_CONTINUE; } static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_POST_ROUTING), .me = THIS_MODULE, }, { .name = "DNPT", .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, }; static int __init ip6t_npt_init(void) { return xt_register_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } static void __exit ip6t_npt_exit(void) { xt_unregister_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } module_init(ip6t_npt_init); module_exit(ip6t_npt_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS("ip6t_SNPT"); MODULE_ALIAS("ip6t_DNPT");
6 31 101 3 3 2 1 121 17 35 90 4 4 2 2 1 3 31 31 31 17 2 8 4 4 9 233 233 10 267 270 270 36 93 135 10 7 3 180 411 415 5 1 1 5 54 54 54 12 7 1 13 11 11 1 1 12 1 1 102 10 1 97 92 7 10 4 7 7 5 2 132 131 11 11 9 2 11 11 9 2 3 9 2 11 1 9 2 11 11 11 2 2 37 37 34 1 3 18 27 5 1 7 6 7 10 10 1 8 1 4 4 7 7 2 2 2 9 8 8 5 9 9 6 3 1 4 5 1 8 1 9 117 118 119 1 108 5 75 80 17 9 9 12 1 4 1 12 1 4 1 2 1 15 8 31 43 43 2 41 2 3 1 4 6 1 1 6 7 1 1 3 4 2 2 1 1 2 2 1 1 11 1 1 1 1 1 7 7 1 8 29 3 3 2 1 1 1 1 27 27 1 2 6 3 19 21 14 1 14 6 21 132 133 1 1 132 3 3 128 1 10 9 1 118 3 123 2 125 8 114 1 119 128 118 8 1 1 1 3 2 1 167 167 2 5 6 156 12 8 134 2 1 3 2 1 3 1 3 16 1 2 2 1 1 2 1 1 3 1 1 21 21 1 16 1 1 1 1 21 21 1 1 6 6 4 28 28 1 21 1 5 9 30 30 1 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth HCI sockets. */ #include <linux/compat.h> #include <linux/export.h> #include <linux/utsname.h> #include <linux/sched.h> #include <linux/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/hci_mon.h> #include <net/bluetooth/mgmt.h> #include "mgmt_util.h" static LIST_HEAD(mgmt_chan_list); static DEFINE_MUTEX(mgmt_chan_list_lock); static DEFINE_IDA(sock_cookie_ida); static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ /* Socket info */ #define hci_pi(sk) ((struct hci_pinfo *) sk) struct hci_pinfo { struct bt_sock bt; struct hci_dev *hdev; struct hci_filter filter; __u8 cmsg_mask; unsigned short channel; unsigned long flags; __u32 cookie; char comm[TASK_COMM_LEN]; __u16 mtu; }; static struct hci_dev *hci_hdev_from_sock(struct sock *sk) { struct hci_dev *hdev = hci_pi(sk)->hdev; if (!hdev) return ERR_PTR(-EBADFD); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) return ERR_PTR(-EPIPE); return hdev; } void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); } void hci_sock_clear_flag(struct sock *sk, int nr) { clear_bit(nr, &hci_pi(sk)->flags); } int hci_sock_test_flag(struct sock *sk, int nr) { return test_bit(nr, &hci_pi(sk)->flags); } unsigned short hci_sock_get_channel(struct sock *sk) { return hci_pi(sk)->channel; } u32 hci_sock_get_cookie(struct sock *sk) { return hci_pi(sk)->cookie; } static bool hci_sock_gen_cookie(struct sock *sk) { int id = hci_pi(sk)->cookie; if (!id) { id = ida_alloc_min(&sock_cookie_ida, 1, GFP_KERNEL); if (id < 0) id = 0xffffffff; hci_pi(sk)->cookie = id; get_task_comm(hci_pi(sk)->comm, current); return true; } return false; } static void hci_sock_free_cookie(struct sock *sk) { int id = hci_pi(sk)->cookie; if (id) { hci_pi(sk)->cookie = 0xffffffff; ida_free(&sock_cookie_ida, id); } } static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); } /* Security filter */ #define HCI_SFLT_MAX_OGF 5 struct hci_sec_filter { __u32 type_mask; __u32 event_mask[2]; __u32 ocf_mask[HCI_SFLT_MAX_OGF + 1][4]; }; static const struct hci_sec_filter hci_sec_filter = { /* Packet types */ 0x10, /* Events */ { 0x1000d9fe, 0x0000b00c }, /* Commands */ { { 0x0 }, /* OGF_LINK_CTL */ { 0xbe000006, 0x00000001, 0x00000000, 0x00 }, /* OGF_LINK_POLICY */ { 0x00005200, 0x00000000, 0x00000000, 0x00 }, /* OGF_HOST_CTL */ { 0xaab00200, 0x2b402aaa, 0x05220154, 0x00 }, /* OGF_INFO_PARAM */ { 0x000002be, 0x00000000, 0x00000000, 0x00 }, /* OGF_STATUS_PARAM */ { 0x000000ea, 0x00000000, 0x00000000, 0x00 } } }; static struct bt_sock_list hci_sk_list = { .lock = __RW_LOCK_UNLOCKED(hci_sk_list.lock) }; static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb) { struct hci_filter *flt; int flt_type, flt_event; /* Apply filter */ flt = &hci_pi(sk)->filter; flt_type = hci_skb_pkt_type(skb) & HCI_FLT_TYPE_BITS; if (!test_bit(flt_type, &flt->type_mask)) return true; /* Extra filter for event packets only */ if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT) return false; flt_event = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); if (!hci_test_bit(flt_event, &flt->event_mask)) return true; /* Check filter only when opcode is set */ if (!flt->opcode) return false; if (flt_event == HCI_EV_CMD_COMPLETE && flt->opcode != get_unaligned((__le16 *)(skb->data + 3))) return true; if (flt_event == HCI_EV_CMD_STATUS && flt->opcode != get_unaligned((__le16 *)(skb->data + 4))) return true; return false; } /* Send frame to RAW socket */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; struct sk_buff *skb_copy = NULL; BT_DBG("hdev %p len %d", hdev, skb->len); read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev) continue; /* Don't send frame to the socket it came from */ if (skb->sk == sk) continue; if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) { if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && hci_skb_pkt_type(skb) != HCI_EVENT_PKT && hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) continue; if (is_filtered_packet(sk, skb)) continue; } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { if (!bt_cb(skb)->incoming) continue; if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) continue; } else { /* Don't send frame to other channel types */ continue; } if (!skb_copy) { /* Create a private copy with headroom */ skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true); if (!skb_copy) continue; /* Put type byte before the data */ memcpy(skb_push(skb_copy, 1), &hci_skb_pkt_type(skb), 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); if (!nskb) continue; if (sock_queue_rcv_skb(sk, nskb)) kfree_skb(nskb); } read_unlock(&hci_sk_list.lock); kfree_skb(skb_copy); } static void hci_sock_copy_creds(struct sock *sk, struct sk_buff *skb) { struct scm_creds *creds; if (!sk || WARN_ON(!skb)) return; creds = &bt_cb(skb)->creds; /* Check if peer credentials is set */ if (!sk->sk_peer_pid) { /* Check if parent peer credentials is set */ if (bt_sk(sk)->parent && bt_sk(sk)->parent->sk_peer_pid) sk = bt_sk(sk)->parent; else return; } /* Check if scm_creds already set */ if (creds->pid == pid_vnr(sk->sk_peer_pid)) return; memset(creds, 0, sizeof(*creds)); creds->pid = pid_vnr(sk->sk_peer_pid); if (sk->sk_peer_cred) { creds->uid = sk->sk_peer_cred->uid; creds->gid = sk->sk_peer_cred->gid; } } static struct sk_buff *hci_skb_clone(struct sk_buff *skb) { struct sk_buff *nskb; if (!skb) return NULL; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return NULL; hci_sock_copy_creds(skb->sk, nskb); return nskb; } /* Send frame to sockets with specific channel */ static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk) { struct sock *sk; BT_DBG("channel %u len %d", channel, skb->len); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; /* Ignore socket without the flag set */ if (!hci_sock_test_flag(sk, flag)) continue; /* Skip the original socket */ if (sk == skip_sk) continue; if (sk->sk_state != BT_BOUND) continue; if (hci_pi(sk)->channel != channel) continue; nskb = hci_skb_clone(skb); if (!nskb) continue; if (sock_queue_rcv_skb(sk, nskb)) kfree_skb(nskb); } } void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk) { read_lock(&hci_sk_list.lock); __hci_send_to_channel(channel, skb, flag, skip_sk); read_unlock(&hci_sk_list.lock); } /* Send frame to monitor socket */ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { struct sk_buff *skb_copy = NULL; struct hci_mon_hdr *hdr; __le16 opcode; if (!atomic_read(&monitor_promisc)) return; BT_DBG("hdev %p len %d", hdev, skb->len); switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: opcode = cpu_to_le16(HCI_MON_COMMAND_PKT); break; case HCI_EVENT_PKT: opcode = cpu_to_le16(HCI_MON_EVENT_PKT); break; case HCI_ACLDATA_PKT: if (bt_cb(skb)->incoming) opcode = cpu_to_le16(HCI_MON_ACL_RX_PKT); else opcode = cpu_to_le16(HCI_MON_ACL_TX_PKT); break; case HCI_SCODATA_PKT: if (bt_cb(skb)->incoming) opcode = cpu_to_le16(HCI_MON_SCO_RX_PKT); else opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT); break; case HCI_ISODATA_PKT: if (bt_cb(skb)->incoming) opcode = cpu_to_le16(HCI_MON_ISO_RX_PKT); else opcode = cpu_to_le16(HCI_MON_ISO_TX_PKT); break; case HCI_DIAG_PKT: opcode = cpu_to_le16(HCI_MON_VENDOR_DIAG); break; default: return; } /* Create a private copy with headroom */ skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, GFP_ATOMIC, true); if (!skb_copy) return; hci_sock_copy_creds(skb->sk, skb_copy); /* Put header before the data */ hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len); hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb_copy); } void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, void *data, u16 data_len, ktime_t tstamp, int flag, struct sock *skip_sk) { struct sock *sk; __le16 index; if (hdev) index = cpu_to_le16(hdev->id); else index = cpu_to_le16(MGMT_INDEX_NONE); read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct hci_mon_hdr *hdr; struct sk_buff *skb; if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) continue; /* Ignore socket without the flag set */ if (!hci_sock_test_flag(sk, flag)) continue; /* Skip the original socket */ if (sk == skip_sk) continue; skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC); if (!skb) continue; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); put_unaligned_le16(event, skb_put(skb, 2)); if (data) skb_put_data(skb, data, data_len); skb->tstamp = tstamp; hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); hdr->index = index; hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } read_unlock(&hci_sk_list.lock); } static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; struct hci_mon_new_index *ni; struct hci_mon_index_info *ii; struct sk_buff *skb; __le16 opcode; switch (event) { case HCI_DEV_REG: skb = bt_skb_alloc(HCI_MON_NEW_INDEX_SIZE, GFP_ATOMIC); if (!skb) return NULL; ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE); ni->type = 0x00; /* Old hdev->dev_type */ ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, strnlen(hdev->name, sizeof(ni->name)), '\0'); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; case HCI_DEV_UNREG: skb = bt_skb_alloc(0, GFP_ATOMIC); if (!skb) return NULL; opcode = cpu_to_le16(HCI_MON_DEL_INDEX); break; case HCI_DEV_SETUP: if (hdev->manufacturer == 0xffff) return NULL; fallthrough; case HCI_DEV_UP: skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC); if (!skb) return NULL; ii = skb_put(skb, HCI_MON_INDEX_INFO_SIZE); bacpy(&ii->bdaddr, &hdev->bdaddr); ii->manufacturer = cpu_to_le16(hdev->manufacturer); opcode = cpu_to_le16(HCI_MON_INDEX_INFO); break; case HCI_DEV_OPEN: skb = bt_skb_alloc(0, GFP_ATOMIC); if (!skb) return NULL; opcode = cpu_to_le16(HCI_MON_OPEN_INDEX); break; case HCI_DEV_CLOSE: skb = bt_skb_alloc(0, GFP_ATOMIC); if (!skb) return NULL; opcode = cpu_to_le16(HCI_MON_CLOSE_INDEX); break; default: return NULL; } __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) { struct hci_mon_hdr *hdr; struct sk_buff *skb; u16 format; u8 ver[3]; u32 flags; /* No message needed when cookie is not present */ if (!hci_pi(sk)->cookie) return NULL; switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: format = 0x0000; ver[0] = BT_SUBSYS_VERSION; put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); break; case HCI_CHANNEL_USER: format = 0x0001; ver[0] = BT_SUBSYS_VERSION; put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); break; default: /* No message for unsupported format */ return NULL; } skb = bt_skb_alloc(14 + TASK_COMM_LEN, GFP_ATOMIC); if (!skb) return NULL; hci_sock_copy_creds(sk, skb); flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); put_unaligned_le16(format, skb_put(skb, 2)); skb_put_data(skb, ver, sizeof(ver)); put_unaligned_le32(flags, skb_put(skb, 4)); skb_put_u8(skb, TASK_COMM_LEN); skb_put_data(skb, hci_pi(sk)->comm, TASK_COMM_LEN); __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); if (hci_pi(sk)->hdev) hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); else hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) { struct hci_mon_hdr *hdr; struct sk_buff *skb; /* No message needed when cookie is not present */ if (!hci_pi(sk)->cookie) return NULL; switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: break; default: /* No message for unsupported format */ return NULL; } skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; hci_sock_copy_creds(sk, skb); put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); if (hci_pi(sk)->hdev) hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); else hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, u16 opcode, u16 len, const void *buf) { struct hci_mon_hdr *hdr; struct sk_buff *skb; skb = bt_skb_alloc(6 + len, GFP_ATOMIC); if (!skb) return NULL; hci_sock_copy_creds(sk, skb); put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); put_unaligned_le16(opcode, skb_put(skb, 2)); if (buf) skb_put_data(skb, buf, len); __net_timestamp(skb); hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; } static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { size_t len; struct hci_mon_hdr *hdr; struct sk_buff *skb; va_list args; va_start(args, fmt); len = vsnprintf(NULL, 0, fmt, args); va_end(args); skb = bt_skb_alloc(len + 1, GFP_ATOMIC); if (!skb) return; hci_sock_copy_creds(sk, skb); va_start(args, fmt); vsprintf(skb_put(skb, len), fmt, args); *(u8 *)skb_put(skb, 1) = 0; va_end(args); __net_timestamp(skb); hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_SYSTEM_NOTE); hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); } static void send_monitor_replay(struct sock *sk) { struct hci_dev *hdev; read_lock(&hci_dev_list_lock); list_for_each_entry(hdev, &hci_dev_list, list) { struct sk_buff *skb; skb = create_monitor_event(hdev, HCI_DEV_REG); if (!skb) continue; if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); if (!test_bit(HCI_RUNNING, &hdev->flags)) continue; skb = create_monitor_event(hdev, HCI_DEV_OPEN); if (!skb) continue; if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); if (test_bit(HCI_UP, &hdev->flags)) skb = create_monitor_event(hdev, HCI_DEV_UP); else if (hci_dev_test_flag(hdev, HCI_SETUP)) skb = create_monitor_event(hdev, HCI_DEV_SETUP); else skb = NULL; if (skb) { if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); } } read_unlock(&hci_dev_list_lock); } static void send_monitor_control_replay(struct sock *mon_sk) { struct sock *sk; read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *skb; skb = create_monitor_ctrl_open(sk); if (!skb) continue; if (sock_queue_rcv_skb(mon_sk, skb)) kfree_skb(skb); } read_unlock(&hci_sk_list.lock); } /* Generate internal stack event */ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) { struct hci_event_hdr *hdr; struct hci_ev_stack_internal *ev; struct sk_buff *skb; skb = bt_skb_alloc(HCI_EVENT_HDR_SIZE + sizeof(*ev) + dlen, GFP_ATOMIC); if (!skb) return; hdr = skb_put(skb, HCI_EVENT_HDR_SIZE); hdr->evt = HCI_EV_STACK_INTERNAL; hdr->plen = sizeof(*ev) + dlen; ev = skb_put(skb, sizeof(*ev) + dlen); ev->type = type; memcpy(ev->data, data, dlen); bt_cb(skb)->incoming = 1; __net_timestamp(skb); hci_skb_pkt_type(skb) = HCI_EVENT_PKT; hci_send_to_sock(hdev, skb); kfree_skb(skb); } void hci_sock_dev_event(struct hci_dev *hdev, int event) { BT_DBG("hdev %s event %d", hdev->name, event); if (atomic_read(&monitor_promisc)) { struct sk_buff *skb; /* Send event to monitor */ skb = create_monitor_event(hdev, event); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } if (event <= HCI_DEV_DOWN) { struct hci_ev_si_device ev; /* Send event to sockets */ ev.event = event; ev.dev_id = hdev->id; hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev); } if (event == HCI_DEV_UNREG) { struct sock *sk; /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { if (hci_pi(sk)->hdev == hdev) { sk->sk_err = EPIPE; sk->sk_state_change(sk); } } read_unlock(&hci_sk_list.lock); } } static struct hci_mgmt_chan *__hci_mgmt_chan_find(unsigned short channel) { struct hci_mgmt_chan *c; list_for_each_entry(c, &mgmt_chan_list, list) { if (c->channel == channel) return c; } return NULL; } static struct hci_mgmt_chan *hci_mgmt_chan_find(unsigned short channel) { struct hci_mgmt_chan *c; mutex_lock(&mgmt_chan_list_lock); c = __hci_mgmt_chan_find(channel); mutex_unlock(&mgmt_chan_list_lock); return c; } int hci_mgmt_chan_register(struct hci_mgmt_chan *c) { if (c->channel < HCI_CHANNEL_CONTROL) return -EINVAL; mutex_lock(&mgmt_chan_list_lock); if (__hci_mgmt_chan_find(c->channel)) { mutex_unlock(&mgmt_chan_list_lock); return -EALREADY; } list_add_tail(&c->list, &mgmt_chan_list); mutex_unlock(&mgmt_chan_list_lock); return 0; } EXPORT_SYMBOL(hci_mgmt_chan_register); void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c) { mutex_lock(&mgmt_chan_list_lock); list_del(&c->list); mutex_unlock(&mgmt_chan_list_lock); } EXPORT_SYMBOL(hci_mgmt_chan_unregister); static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; struct sk_buff *skb; BT_DBG("sock %p sk %p", sock, sk); if (!sk) return 0; lock_sock(sk); switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } hci_sock_free_cookie(sk); break; } bt_sock_unlink(&hci_sk_list, sk); hdev = hci_pi(sk)->hdev; if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER && !hci_dev_test_flag(hdev, HCI_UNREGISTER)) { /* When releasing a user channel exclusive access, * call hci_dev_do_close directly instead of calling * hci_dev_close to ensure the exclusive access will * be released and the controller brought back down. * * The checking of HCI_AUTO_OFF is not needed in this * case since it will have been cleared already when * opening the user channel. * * Make sure to also check that we haven't already * unregistered since all the cleanup will have already * been complete and hdev will get released when we put * below. */ hci_dev_do_close(hdev); hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); mgmt_index_added(hdev); } atomic_dec(&hdev->promisc); hci_dev_put(hdev); } sock_orphan(sk); release_sock(sk); sock_put(sk); return 0; } static int hci_sock_reject_list_add(struct hci_dev *hdev, void __user *arg) { bdaddr_t bdaddr; int err; if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) return -EFAULT; hci_dev_lock(hdev); err = hci_bdaddr_list_add(&hdev->reject_list, &bdaddr, BDADDR_BREDR); hci_dev_unlock(hdev); return err; } static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg) { bdaddr_t bdaddr; int err; if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) return -EFAULT; hci_dev_lock(hdev); err = hci_bdaddr_list_del(&hdev->reject_list, &bdaddr, BDADDR_BREDR); hci_dev_unlock(hdev); return err; } /* Ioctls that require bound socket */ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { struct hci_dev *hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return -EOPNOTSUPP; switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) return -EPERM; return -EOPNOTSUPP; case HCIGETCONNINFO: return hci_get_conn_info(hdev, (void __user *)arg); case HCIGETAUTHINFO: return hci_get_auth_info(hdev, (void __user *)arg); case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_sock_reject_list_add(hdev, (void __user *)arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_sock_reject_list_del(hdev, (void __user *)arg); } return -ENOIOCTLCMD; } static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; int err; BT_DBG("cmd %x arg %lx", cmd, arg); /* Make sure the cmd is valid before doing anything */ switch (cmd) { case HCIGETDEVLIST: case HCIGETDEVINFO: case HCIGETCONNLIST: case HCIDEVUP: case HCIDEVDOWN: case HCIDEVRESET: case HCIDEVRESTAT: case HCISETSCAN: case HCISETAUTH: case HCISETENCRYPT: case HCISETPTYPE: case HCISETLINKPOL: case HCISETLINKMODE: case HCISETACLMTU: case HCISETSCOMTU: case HCIINQUIRY: case HCISETRAW: case HCIGETCONNINFO: case HCIGETAUTHINFO: case HCIBLOCKADDR: case HCIUNBLOCKADDR: break; default: return -ENOIOCTLCMD; } lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { err = -EBADFD; goto done; } /* When calling an ioctl on an unbound raw socket, then ensure * that the monitor gets informed. Ensure that the resulting event * is only send once by checking if the cookie exists or not. The * socket cookie will be only ever generated once for the lifetime * of a given socket. */ if (hci_sock_gen_cookie(sk)) { struct sk_buff *skb; /* Perform careful checks before setting the HCI_SOCK_TRUSTED * flag. Make sure that not only the current task but also * the socket opener has the required capability, since * privileged programs can be tricked into making ioctl calls * on HCI sockets, and the socket should not be marked as * trusted simply because the ioctl caller is privileged. */ if (sk_capable(sk, CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } release_sock(sk); switch (cmd) { case HCIGETDEVLIST: return hci_get_dev_list(argp); case HCIGETDEVINFO: return hci_get_dev_info(argp); case HCIGETCONNLIST: return hci_get_conn_list(argp); case HCIDEVUP: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_open(arg); case HCIDEVDOWN: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_close(arg); case HCIDEVRESET: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_reset(arg); case HCIDEVRESTAT: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_reset_stat(arg); case HCISETSCAN: case HCISETAUTH: case HCISETENCRYPT: case HCISETPTYPE: case HCISETLINKPOL: case HCISETLINKMODE: case HCISETACLMTU: case HCISETSCOMTU: if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_dev_cmd(cmd, argp); case HCIINQUIRY: return hci_inquiry(argp); } lock_sock(sk); err = hci_sock_bound_ioctl(sk, cmd, arg); done: release_sock(sk); return err; } #ifdef CONFIG_COMPAT static int hci_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { switch (cmd) { case HCIDEVUP: case HCIDEVDOWN: case HCIDEVRESET: case HCIDEVRESTAT: return hci_sock_ioctl(sock, cmd, arg); } return hci_sock_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } #endif static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; struct sk_buff *skb; int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); if (!addr) return -EINVAL; memset(&haddr, 0, sizeof(haddr)); len = min_t(unsigned int, sizeof(haddr), addr_len); memcpy(&haddr, addr, len); if (haddr.hci_family != AF_BLUETOOTH) return -EINVAL; lock_sock(sk); /* Allow detaching from dead device and attaching to alive device, if * the caller wants to re-bind (instead of close) this socket in * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. */ hdev = hci_pi(sk)->hdev; if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { hci_pi(sk)->hdev = NULL; sk->sk_state = BT_OPEN; hci_dev_put(hdev); } hdev = NULL; if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; } switch (haddr.hci_channel) { case HCI_CHANNEL_RAW: if (hci_pi(sk)->hdev) { err = -EALREADY; goto done; } if (haddr.hci_dev != HCI_DEV_NONE) { hdev = hci_dev_get(haddr.hci_dev); if (!hdev) { err = -ENODEV; goto done; } atomic_inc(&hdev->promisc); } hci_pi(sk)->channel = haddr.hci_channel; if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been assigned, * then there has been already an ioctl issued against * an unbound socket and with that triggered an open * notification. Send a close notification first to * allow the state transition to bounded. */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); hci_pi(sk)->hdev = hdev; /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } break; case HCI_CHANNEL_USER: if (hci_pi(sk)->hdev) { err = -EALREADY; goto done; } if (haddr.hci_dev == HCI_DEV_NONE) { err = -EINVAL; goto done; } if (!capable(CAP_NET_ADMIN)) { err = -EPERM; goto done; } hdev = hci_dev_get(haddr.hci_dev); if (!hdev) { err = -ENODEV; goto done; } if (test_bit(HCI_INIT, &hdev->flags) || hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG) || (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) && test_bit(HCI_UP, &hdev->flags))) { err = -EBUSY; hci_dev_put(hdev); goto done; } if (hci_dev_test_and_set_flag(hdev, HCI_USER_CHANNEL)) { err = -EUSERS; hci_dev_put(hdev); goto done; } mgmt_index_removed(hdev); err = hci_dev_open(hdev->id); if (err) { if (err == -EALREADY) { /* In case the transport is already up and * running, clear the error here. * * This can happen when opening a user * channel and HCI_AUTO_OFF grace period * is still active. */ err = 0; } else { hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); mgmt_index_added(hdev); hci_dev_put(hdev); goto done; } } hci_pi(sk)->channel = haddr.hci_channel; if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been assigned, * this socket will transition from a raw socket into * a user channel socket. For a clean transition, send * the close notification first. */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } /* The user channel is restricted to CAP_NET_ADMIN * capabilities and with that implicitly trusted. */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); hci_pi(sk)->hdev = hdev; /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } atomic_inc(&hdev->promisc); break; case HCI_CHANNEL_MONITOR: if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } if (!capable(CAP_NET_RAW)) { err = -EPERM; goto done; } hci_pi(sk)->channel = haddr.hci_channel; /* The monitor interface is restricted to CAP_NET_RAW * capabilities and with that implicitly trusted. */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); send_monitor_note(sk, "Linux version %s (%s)", init_utsname()->release, init_utsname()->machine); send_monitor_note(sk, "Bluetooth subsystem version %u.%u", BT_SUBSYS_VERSION, BT_SUBSYS_REVISION); send_monitor_replay(sk); send_monitor_control_replay(sk); atomic_inc(&monitor_promisc); break; case HCI_CHANNEL_LOGGING: if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } if (!capable(CAP_NET_ADMIN)) { err = -EPERM; goto done; } hci_pi(sk)->channel = haddr.hci_channel; break; default: if (!hci_mgmt_chan_find(haddr.hci_channel)) { err = -EINVAL; goto done; } if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; goto done; } /* Users with CAP_NET_ADMIN capabilities are allowed * access to all management commands and events. For * untrusted users the interface is restricted and * also only untrusted events are sent. */ if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); hci_pi(sk)->channel = haddr.hci_channel; /* At the moment the index and unconfigured index events * are enabled unconditionally. Setting them on each * socket when binding keeps this functionality. They * however might be cleared later and then sending of these * events will be disabled, but that is then intentional. * * This also enables generic events that are safe to be * received by untrusted users. Example for such events * are changes to settings, class of device, name etc. */ if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been * assigned, this socket will transition from * a raw socket into a control socket. To * allow for a clean transition, send the * close notification first. */ skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); } break; } /* Default MTU to HCI_MAX_FRAME_SIZE if not set */ if (!hci_pi(sk)->mtu) hci_pi(sk)->mtu = HCI_MAX_FRAME_SIZE; sk->sk_state = BT_BOUND; done: release_sock(sk); return err; } static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int peer) { struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr; struct sock *sk = sock->sk; struct hci_dev *hdev; int err = 0; BT_DBG("sock %p sk %p", sock, sk); if (peer) return -EOPNOTSUPP; lock_sock(sk); hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) { err = PTR_ERR(hdev); goto done; } haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; haddr->hci_channel= hci_pi(sk)->channel; err = sizeof(*haddr); done: release_sock(sk); return err; } static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { __u8 mask = hci_pi(sk)->cmsg_mask; if (mask & HCI_CMSG_DIR) { int incoming = bt_cb(skb)->incoming; put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming); } if (mask & HCI_CMSG_TSTAMP) { #ifdef CONFIG_COMPAT struct old_timeval32 ctv; #endif struct __kernel_old_timeval tv; void *data; int len; skb_get_timestamp(skb, &tv); data = &tv; len = sizeof(tv); #ifdef CONFIG_COMPAT if (!COMPAT_USE_64BIT_TIME && (msg->msg_flags & MSG_CMSG_COMPAT)) { ctv.tv_sec = tv.tv_sec; ctv.tv_usec = tv.tv_usec; data = &ctv; len = sizeof(ctv); } #endif put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, len, data); } } static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; unsigned int skblen; BT_DBG("sock %p, sk %p", sock, sk); if (flags & MSG_OOB) return -EOPNOTSUPP; if (hci_pi(sk)->channel == HCI_CHANNEL_LOGGING) return -EOPNOTSUPP; if (sk->sk_state == BT_CLOSED) return 0; skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; skblen = skb->len; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: hci_sock_cmsg(sk, msg, skb); break; case HCI_CHANNEL_USER: case HCI_CHANNEL_MONITOR: sock_recv_timestamp(msg, sk, skb); break; default: if (hci_mgmt_chan_find(hci_pi(sk)->channel)) sock_recv_timestamp(msg, sk, skb); break; } memset(&scm, 0, sizeof(scm)); scm.creds = bt_cb(skb)->creds; skb_free_datagram(sk, skb); if (flags & MSG_TRUNC) copied = skblen; scm_recv(sock, msg, &scm, flags); return err ? : copied; } static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, struct sk_buff *skb) { u8 *cp; struct mgmt_hdr *hdr; u16 opcode, index, len; struct hci_dev *hdev = NULL; const struct hci_mgmt_handler *handler; bool var_len, no_hdev; int err; BT_DBG("got %d bytes", skb->len); if (skb->len < sizeof(*hdr)) return -EINVAL; hdr = (void *)skb->data; opcode = __le16_to_cpu(hdr->opcode); index = __le16_to_cpu(hdr->index); len = __le16_to_cpu(hdr->len); if (len != skb->len - sizeof(*hdr)) { err = -EINVAL; goto done; } if (chan->channel == HCI_CHANNEL_CONTROL) { struct sk_buff *cmd; /* Send event to monitor */ cmd = create_monitor_ctrl_command(sk, index, opcode, len, skb->data + sizeof(*hdr)); if (cmd) { hci_send_to_channel(HCI_CHANNEL_MONITOR, cmd, HCI_SOCK_TRUSTED, NULL); kfree_skb(cmd); } } if (opcode >= chan->handler_count || chan->handlers[opcode].func == NULL) { BT_DBG("Unknown op %u", opcode); err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_UNKNOWN_COMMAND); goto done; } handler = &chan->handlers[opcode]; if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) && !(handler->flags & HCI_MGMT_UNTRUSTED)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_PERMISSION_DENIED); goto done; } if (index != MGMT_INDEX_NONE) { hdev = hci_dev_get(index); if (!hdev) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } if (hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG) || hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !(handler->flags & HCI_MGMT_UNCONFIGURED)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } } if (!(handler->flags & HCI_MGMT_HDEV_OPTIONAL)) { no_hdev = (handler->flags & HCI_MGMT_NO_HDEV); if (no_hdev != !hdev) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } } var_len = (handler->flags & HCI_MGMT_VAR_LEN); if ((var_len && len < handler->data_len) || (!var_len && len != handler->data_len)) { err = mgmt_cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_PARAMS); goto done; } if (hdev && chan->hdev_init) chan->hdev_init(sk, hdev); cp = skb->data + sizeof(*hdr); err = handler->func(sk, hdev, cp, len); if (err < 0) goto done; err = skb->len; done: if (hdev) hci_dev_put(hdev); return err; } static int hci_logging_frame(struct sock *sk, struct sk_buff *skb, unsigned int flags) { struct hci_mon_hdr *hdr; struct hci_dev *hdev; u16 index; int err; /* The logging frame consists at minimum of the standard header, * the priority byte, the ident length byte and at least one string * terminator NUL byte. Anything shorter are invalid packets. */ if (skb->len < sizeof(*hdr) + 3) return -EINVAL; hdr = (void *)skb->data; if (__le16_to_cpu(hdr->len) != skb->len - sizeof(*hdr)) return -EINVAL; if (__le16_to_cpu(hdr->opcode) == 0x0000) { __u8 priority = skb->data[sizeof(*hdr)]; __u8 ident_len = skb->data[sizeof(*hdr) + 1]; /* Only the priorities 0-7 are valid and with that any other * value results in an invalid packet. * * The priority byte is followed by an ident length byte and * the NUL terminated ident string. Check that the ident * length is not overflowing the packet and also that the * ident string itself is NUL terminated. In case the ident * length is zero, the length value actually doubles as NUL * terminator identifier. * * The message follows the ident string (if present) and * must be NUL terminated. Otherwise it is not a valid packet. */ if (priority > 7 || skb->data[skb->len - 1] != 0x00 || ident_len > skb->len - sizeof(*hdr) - 3 || skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) return -EINVAL; } else { return -EINVAL; } index = __le16_to_cpu(hdr->index); if (index != MGMT_INDEX_NONE) { hdev = hci_dev_get(index); if (!hdev) return -ENODEV; } else { hdev = NULL; } hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING); hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); err = skb->len; if (hdev) hci_dev_put(hdev); return err; } static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct hci_mgmt_chan *chan; struct hci_dev *hdev; struct sk_buff *skb; int err; const unsigned int flags = msg->msg_flags; BT_DBG("sock %p sk %p", sock, sk); if (flags & MSG_OOB) return -EOPNOTSUPP; if (flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL | MSG_ERRQUEUE | MSG_CMSG_COMPAT)) return -EINVAL; if (len < 4 || len > hci_pi(sk)->mtu) return -EINVAL; skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); if (IS_ERR(skb)) return PTR_ERR(skb); lock_sock(sk); switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: break; case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; goto drop; case HCI_CHANNEL_LOGGING: err = hci_logging_frame(sk, skb, flags); goto drop; default: mutex_lock(&mgmt_chan_list_lock); chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); if (chan) err = hci_mgmt_cmd(chan, sk, skb); else err = -EINVAL; mutex_unlock(&mgmt_chan_list_lock); goto drop; } hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) { err = PTR_ERR(hdev); goto drop; } if (!test_bit(HCI_UP, &hdev->flags)) { err = -ENETDOWN; goto drop; } hci_skb_pkt_type(skb) = skb->data[0]; skb_pull(skb, 1); if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { /* No permission check is needed for user channel * since that gets enforced when binding the socket. * * However check that the packet type is valid. */ if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) { err = -EINVAL; goto drop; } skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } else if (hci_skb_pkt_type(skb) == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); if (((ogf > HCI_SFLT_MAX_OGF) || !hci_test_bit(ocf & HCI_FLT_OCF_BITS, &hci_sec_filter.ocf_mask[ogf])) && !capable(CAP_NET_RAW)) { err = -EPERM; goto drop; } /* Since the opcode has already been extracted here, store * a copy of the value for later use by the drivers. */ hci_skb_opcode(skb) = opcode; if (ogf == 0x3f) { skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } else { /* Stand-alone HCI commands must be flagged as * single-command requests. */ bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); } } else { if (!capable(CAP_NET_RAW)) { err = -EPERM; goto drop; } if (hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) { err = -EINVAL; goto drop; } skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } err = len; done: release_sock(sk); return err; drop: kfree_skb(skb); goto done; } static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct hci_ufilter uf = { .opcode = 0 }; struct sock *sk = sock->sk; int err = 0, opt = 0; BT_DBG("sk %p, opt %d", sk, optname); lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { err = -EBADFD; goto done; } switch (optname) { case HCI_DATA_DIR: err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen); if (err) break; if (opt) hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR; else hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_DIR; break; case HCI_TIME_STAMP: err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen); if (err) break; if (opt) hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP; else hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_TSTAMP; break; case HCI_FILTER: { struct hci_filter *f = &hci_pi(sk)->filter; uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); uf.event_mask[1] = *((u32 *) f->event_mask + 1); } err = copy_safe_from_sockptr(&uf, sizeof(uf), optval, optlen); if (err) break; if (!capable(CAP_NET_RAW)) { uf.type_mask &= hci_sec_filter.type_mask; uf.event_mask[0] &= *((u32 *) hci_sec_filter.event_mask + 0); uf.event_mask[1] &= *((u32 *) hci_sec_filter.event_mask + 1); } { struct hci_filter *f = &hci_pi(sk)->filter; f->type_mask = uf.type_mask; f->opcode = uf.opcode; *((u32 *) f->event_mask + 0) = uf.event_mask[0]; *((u32 *) f->event_mask + 1) = uf.event_mask[1]; } break; default: err = -ENOPROTOOPT; break; } done: release_sock(sk); return err; } static int hci_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int err = 0; u16 opt; BT_DBG("sk %p, opt %d", sk, optname); if (level == SOL_HCI) return hci_sock_setsockopt_old(sock, level, optname, optval, optlen); if (level != SOL_BLUETOOTH) return -ENOPROTOOPT; lock_sock(sk); switch (optname) { case BT_SNDMTU: case BT_RCVMTU: switch (hci_pi(sk)->channel) { /* Don't allow changing MTU for channels that are meant for HCI * traffic only. */ case HCI_CHANNEL_RAW: case HCI_CHANNEL_USER: err = -ENOPROTOOPT; goto done; } err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen); if (err) break; hci_pi(sk)->mtu = opt; break; default: err = -ENOPROTOOPT; break; } done: release_sock(sk); return err; } static int hci_sock_getsockopt_old(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct hci_ufilter uf; struct sock *sk = sock->sk; int len, opt, err = 0; BT_DBG("sk %p, opt %d", sk, optname); if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { err = -EBADFD; goto done; } switch (optname) { case HCI_DATA_DIR: if (hci_pi(sk)->cmsg_mask & HCI_CMSG_DIR) opt = 1; else opt = 0; if (put_user(opt, optval)) err = -EFAULT; break; case HCI_TIME_STAMP: if (hci_pi(sk)->cmsg_mask & HCI_CMSG_TSTAMP) opt = 1; else opt = 0; if (put_user(opt, optval)) err = -EFAULT; break; case HCI_FILTER: { struct hci_filter *f = &hci_pi(sk)->filter; memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); uf.event_mask[1] = *((u32 *) f->event_mask + 1); } len = min_t(unsigned int, len, sizeof(uf)); if (copy_to_user(optval, &uf, len)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } done: release_sock(sk); return err; } static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; int err = 0; BT_DBG("sk %p, opt %d", sk, optname); if (level == SOL_HCI) return hci_sock_getsockopt_old(sock, level, optname, optval, optlen); if (level != SOL_BLUETOOTH) return -ENOPROTOOPT; lock_sock(sk); switch (optname) { case BT_SNDMTU: case BT_RCVMTU: if (put_user(hci_pi(sk)->mtu, (u16 __user *)optval)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static void hci_sock_destruct(struct sock *sk) { mgmt_cleanup(sk); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); } static const struct proto_ops hci_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = hci_sock_release, .bind = hci_sock_bind, .getname = hci_sock_getname, .sendmsg = hci_sock_sendmsg, .recvmsg = hci_sock_recvmsg, .ioctl = hci_sock_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = hci_sock_compat_ioctl, #endif .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = hci_sock_setsockopt, .getsockopt = hci_sock_getsockopt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .mmap = sock_no_mmap }; static struct proto hci_sk_proto = { .name = "HCI", .owner = THIS_MODULE, .obj_size = sizeof(struct hci_pinfo) }; static int hci_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; BT_DBG("sock %p", sock); if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sock->ops = &hci_sock_ops; sk = bt_sock_alloc(net, sock, &hci_sk_proto, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; sock->state = SS_UNCONNECTED; sk->sk_destruct = hci_sock_destruct; bt_sock_link(&hci_sk_list, sk); return 0; } static const struct net_proto_family hci_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = hci_sock_create, }; int __init hci_sock_init(void) { int err; BUILD_BUG_ON(sizeof(struct sockaddr_hci) > sizeof(struct sockaddr)); err = proto_register(&hci_sk_proto, 0); if (err < 0) return err; err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops); if (err < 0) { BT_ERR("HCI socket registration failed"); goto error; } err = bt_procfs_init(&init_net, "hci", &hci_sk_list, NULL); if (err < 0) { BT_ERR("Failed to create HCI proc file"); bt_sock_unregister(BTPROTO_HCI); goto error; } BT_INFO("HCI socket layer initialized"); return 0; error: proto_unregister(&hci_sk_proto); return err; } void hci_sock_cleanup(void) { bt_procfs_cleanup(&init_net, "hci"); bt_sock_unregister(BTPROTO_HCI); proto_unregister(&hci_sk_proto); }
92 192 202 94 89 98 174 161 169 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_QM_H__ #define __XFS_QM_H__ #include "xfs_dquot_item.h" #include "xfs_dquot.h" struct xfs_inode; extern struct kmem_cache *xfs_dqtrx_cache; /* * Number of bmaps that we ask from bmapi when doing a quotacheck. * We make this restriction to keep the memory usage to a minimum. */ #define XFS_DQITER_MAP_SIZE 10 #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ !dqp->q_blk.hardlimit && \ !dqp->q_blk.softlimit && \ !dqp->q_rtb.hardlimit && \ !dqp->q_rtb.softlimit && \ !dqp->q_ino.hardlimit && \ !dqp->q_ino.softlimit && \ !dqp->q_blk.count && \ !dqp->q_rtb.count && \ !dqp->q_ino.count) struct xfs_quota_limits { xfs_qcnt_t hard; /* default hard limit */ xfs_qcnt_t soft; /* default soft limit */ time64_t time; /* limit for timers */ }; /* Defaults for each quota type: time limits, warn limits, usage limits */ struct xfs_def_quota { struct xfs_quota_limits blk; struct xfs_quota_limits ino; struct xfs_quota_limits rtb; }; /* * Various quota information for individual filesystems. * The mount structure keeps a pointer to this. */ struct xfs_quotainfo { struct radix_tree_root qi_uquota_tree; struct radix_tree_root qi_gquota_tree; struct radix_tree_root qi_pquota_tree; struct mutex qi_tree_lock; struct xfs_inode *qi_uquotaip; /* user quota inode */ struct xfs_inode *qi_gquotaip; /* group quota inode */ struct xfs_inode *qi_pquotaip; /* project quota inode */ struct xfs_inode *qi_dirip; /* quota metadir */ struct list_lru qi_lru; int qi_dquots; struct mutex qi_quotaofflock;/* to serialize quotaoff */ xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ uint qi_dqperchunk; /* # ondisk dq in above chunk */ struct xfs_def_quota qi_usr_default; struct xfs_def_quota qi_grp_default; struct xfs_def_quota qi_prj_default; struct shrinker *qi_shrinker; /* Minimum and maximum quota expiration timestamp values. */ time64_t qi_expiry_min; time64_t qi_expiry_max; /* Hook to feed quota counter updates to an active online repair. */ struct xfs_hooks qi_mod_ino_dqtrx_hooks; struct xfs_hooks qi_apply_dqtrx_hooks; }; static inline struct radix_tree_root * xfs_dquot_tree( struct xfs_quotainfo *qi, xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return &qi->qi_uquota_tree; case XFS_DQTYPE_GROUP: return &qi->qi_gquota_tree; case XFS_DQTYPE_PROJ: return &qi->qi_pquota_tree; default: ASSERT(0); } return NULL; } static inline struct xfs_inode * xfs_quota_inode(struct xfs_mount *mp, xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return mp->m_quotainfo->qi_uquotaip; case XFS_DQTYPE_GROUP: return mp->m_quotainfo->qi_gquotaip; case XFS_DQTYPE_PROJ: return mp->m_quotainfo->qi_pquotaip; default: ASSERT(0); } return NULL; } /* * Parameters for tracking dqtrx changes on behalf of an inode. The hook * function arg parameter is the field being updated. */ struct xfs_mod_ino_dqtrx_params { uintptr_t tx_id; xfs_ino_t ino; xfs_dqtype_t q_type; xfs_dqid_t q_id; int64_t delta; }; extern void xfs_trans_mod_dquot(struct xfs_trans *tp, struct xfs_dquot *dqp, uint field, int64_t delta); extern void xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *); extern void xfs_trans_log_dquot(struct xfs_trans *, struct xfs_dquot *); /* * We keep the usr, grp, and prj dquots separately so that locking will be * easier to do at commit time. All transactions that we know of at this point * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. */ enum { XFS_QM_TRANS_USR = 0, XFS_QM_TRANS_GRP, XFS_QM_TRANS_PRJ, XFS_QM_TRANS_DQTYPES }; #define XFS_QM_TRANS_MAXDQS 5 struct xfs_dquot_acct { struct xfs_dqtrx dqs[XFS_QM_TRANS_DQTYPES][XFS_QM_TRANS_MAXDQS]; }; /* * Users are allowed to have a usage exceeding their softlimit for * a period this long. */ #define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ #define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ #define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); /* quota ops */ extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); extern int xfs_qm_scall_getquota(struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, struct qc_dqblk *dst); extern int xfs_qm_scall_getquota_next(struct xfs_mount *mp, xfs_dqid_t *id, xfs_dqtype_t type, struct qc_dqblk *dst); extern int xfs_qm_scall_setqlim(struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, struct qc_dqblk *newlim); extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); static inline struct xfs_def_quota * xfs_get_defquota(struct xfs_quotainfo *qi, xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return &qi->qi_usr_default; case XFS_DQTYPE_GROUP: return &qi->qi_grp_default; case XFS_DQTYPE_PROJ: return &qi->qi_prj_default; default: ASSERT(0); return NULL; } } int xfs_qm_qino_load(struct xfs_mount *mp, xfs_dqtype_t type, struct xfs_inode **ipp); #endif /* __XFS_QM_H__ */
2250 12585 12572 198 12585 1523 1522 2 1525 2 205 2243 13295 13286 199 13287 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PERCPU_RWSEM_H #define _LINUX_PERCPU_RWSEM_H #include <linux/atomic.h> #include <linux/percpu.h> #include <linux/rcuwait.h> #include <linux/wait.h> #include <linux/rcu_sync.h> #include <linux/lockdep.h> struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; struct rcuwait writer; wait_queue_head_t waiters; atomic_t block; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif }; #ifdef CONFIG_DEBUG_LOCK_ALLOC #define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }, #else #define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) #endif #define __DEFINE_PERCPU_RWSEM(name, is_static) \ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ is_static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss), \ .read_count = &__percpu_rwsem_rc_##name, \ .writer = __RCUWAIT_INITIALIZER(name.writer), \ .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \ .block = ATOMIC_INIT(0), \ __PERCPU_RWSEM_DEP_MAP_INIT(name) \ } #define DEFINE_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, /* not static */) #define DEFINE_STATIC_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, static) extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); static inline void percpu_down_read(struct percpu_rw_semaphore *sem) { might_sleep(); rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); preempt_disable(); /* * We are in an RCU-sched read-side critical section, so the writer * cannot both change sem->state from readers_fast and start checking * counters while we are here. So if we see !sem->state, we know that * the writer won't be checking until we're past the preempt_enable() * and that once the synchronize_rcu() is done, the writer will see * anything we did within this RCU-sched read-size critical section. */ if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else __percpu_down_read(sem, false); /* Unconditional memory barrier */ /* * The preempt_enable() prevents the compiler from * bleeding the critical section out. */ preempt_enable(); } static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { bool ret = true; preempt_disable(); /* * Same as in percpu_down_read(). */ if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ preempt_enable(); /* * The barrier() from preempt_enable() prevents the compiler from * bleeding the critical section out. */ if (ret) rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); return ret; } static inline void percpu_up_read(struct percpu_rw_semaphore *sem) { rwsem_release(&sem->dep_map, _RET_IP_); preempt_disable(); /* * Same as in percpu_down_read(). */ if (likely(rcu_sync_is_idle(&sem->rss))) { this_cpu_dec(*sem->read_count); } else { /* * slowpath; reader will only ever wake a single blocked * writer. */ smp_mb(); /* B matches C */ /* * In other words, if they see our decrement (presumably to * aggregate zero, as that is the only time it matters) they * will also see our critical section. */ this_cpu_dec(*sem->read_count); rcuwait_wake_up(&sem->writer); } preempt_enable(); } extern bool percpu_is_read_locked(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem) { return atomic_read(&sem->block); } extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, const char *, struct lock_class_key *); extern void percpu_free_rwsem(struct percpu_rw_semaphore *); #define percpu_init_rwsem(sem) \ ({ \ static struct lock_class_key rwsem_key; \ __percpu_init_rwsem(sem, #sem, &rwsem_key); \ }) #define percpu_rwsem_is_held(sem) lockdep_is_held(sem) #define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, unsigned long ip) { lock_release(&sem->dep_map, ip); } static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip); } #endif
2 2 1 1 5 1 5 5 5 10 11 11 11 11 11 11 11 11 11 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 // SPDX-License-Identifier: GPL-2.0 /* * DMABUF System heap exporter * * Copyright (C) 2011 Google, Inc. * Copyright (C) 2019, 2020 Linaro Ltd. * * Portions based off of Andrew Davis' SRAM heap: * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ * Andrew F. Davis <afd@ti.com> */ #include <linux/dma-buf.h> #include <linux/dma-mapping.h> #include <linux/dma-heap.h> #include <linux/err.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/vmalloc.h> static struct dma_heap *sys_heap; struct system_heap_buffer { struct dma_heap *heap; struct list_head attachments; struct mutex lock; unsigned long len; struct sg_table sg_table; int vmap_cnt; void *vaddr; }; struct dma_heap_attachment { struct device *dev; struct sg_table *table; struct list_head list; bool mapped; }; #define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) #define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ | __GFP_NORETRY) & ~__GFP_RECLAIM) \ | __GFP_COMP) static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP}; /* * The selection of the orders used for allocation (1MB, 64K, 4K) is designed * to match with the sizes often found in IOMMUs. Using order 4 pages instead * of order 0 pages can significantly improve the performance of many IOMMUs * by reducing TLB pressure and time spent updating page tables. */ static const unsigned int orders[] = {8, 4, 0}; #define NUM_ORDERS ARRAY_SIZE(orders) static struct sg_table *dup_sg_table(struct sg_table *table) { struct sg_table *new_table; int ret, i; struct scatterlist *sg, *new_sg; new_table = kzalloc(sizeof(*new_table), GFP_KERNEL); if (!new_table) return ERR_PTR(-ENOMEM); ret = sg_alloc_table(new_table, table->orig_nents, GFP_KERNEL); if (ret) { kfree(new_table); return ERR_PTR(-ENOMEM); } new_sg = new_table->sgl; for_each_sgtable_sg(table, sg, i) { sg_set_page(new_sg, sg_page(sg), sg->length, sg->offset); new_sg = sg_next(new_sg); } return new_table; } static int system_heap_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; struct sg_table *table; a = kzalloc(sizeof(*a), GFP_KERNEL); if (!a) return -ENOMEM; table = dup_sg_table(&buffer->sg_table); if (IS_ERR(table)) { kfree(a); return -ENOMEM; } a->table = table; a->dev = attachment->dev; INIT_LIST_HEAD(&a->list); a->mapped = false; attachment->priv = a; mutex_lock(&buffer->lock); list_add(&a->list, &buffer->attachments); mutex_unlock(&buffer->lock); return 0; } static void system_heap_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a = attachment->priv; mutex_lock(&buffer->lock); list_del(&a->list); mutex_unlock(&buffer->lock); sg_free_table(a->table); kfree(a->table); kfree(a); } static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attachment, enum dma_data_direction direction) { struct dma_heap_attachment *a = attachment->priv; struct sg_table *table = a->table; int ret; ret = dma_map_sgtable(attachment->dev, table, direction, 0); if (ret) return ERR_PTR(ret); a->mapped = true; return table; } static void system_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *table, enum dma_data_direction direction) { struct dma_heap_attachment *a = attachment->priv; a->mapped = false; dma_unmap_sgtable(attachment->dev, table, direction, 0); } static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; mutex_lock(&buffer->lock); if (buffer->vmap_cnt) invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); list_for_each_entry(a, &buffer->attachments, list) { if (!a->mapped) continue; dma_sync_sgtable_for_cpu(a->dev, a->table, direction); } mutex_unlock(&buffer->lock); return 0; } static int system_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; mutex_lock(&buffer->lock); if (buffer->vmap_cnt) flush_kernel_vmap_range(buffer->vaddr, buffer->len); list_for_each_entry(a, &buffer->attachments, list) { if (!a->mapped) continue; dma_sync_sgtable_for_device(a->dev, a->table, direction); } mutex_unlock(&buffer->lock); return 0; } static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) { struct system_heap_buffer *buffer = dmabuf->priv; struct sg_table *table = &buffer->sg_table; unsigned long addr = vma->vm_start; struct sg_page_iter piter; int ret; for_each_sgtable_page(table, &piter, vma->vm_pgoff) { struct page *page = sg_page_iter_page(&piter); ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE, vma->vm_page_prot); if (ret) return ret; addr += PAGE_SIZE; if (addr >= vma->vm_end) return 0; } return 0; } static void *system_heap_do_vmap(struct system_heap_buffer *buffer) { struct sg_table *table = &buffer->sg_table; int npages = PAGE_ALIGN(buffer->len) / PAGE_SIZE; struct page **pages = vmalloc(sizeof(struct page *) * npages); struct page **tmp = pages; struct sg_page_iter piter; void *vaddr; if (!pages) return ERR_PTR(-ENOMEM); for_each_sgtable_page(table, &piter, 0) { WARN_ON(tmp - pages >= npages); *tmp++ = sg_page_iter_page(&piter); } vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL); vfree(pages); if (!vaddr) return ERR_PTR(-ENOMEM); return vaddr; } static int system_heap_vmap(struct dma_buf *dmabuf, struct iosys_map *map) { struct system_heap_buffer *buffer = dmabuf->priv; void *vaddr; int ret = 0; mutex_lock(&buffer->lock); if (buffer->vmap_cnt) { buffer->vmap_cnt++; iosys_map_set_vaddr(map, buffer->vaddr); goto out; } vaddr = system_heap_do_vmap(buffer); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto out; } buffer->vaddr = vaddr; buffer->vmap_cnt++; iosys_map_set_vaddr(map, buffer->vaddr); out: mutex_unlock(&buffer->lock); return ret; } static void system_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) { struct system_heap_buffer *buffer = dmabuf->priv; mutex_lock(&buffer->lock); if (!--buffer->vmap_cnt) { vunmap(buffer->vaddr); buffer->vaddr = NULL; } mutex_unlock(&buffer->lock); iosys_map_clear(map); } static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { struct system_heap_buffer *buffer = dmabuf->priv; struct sg_table *table; struct scatterlist *sg; int i; table = &buffer->sg_table; for_each_sgtable_sg(table, sg, i) { struct page *page = sg_page(sg); __free_pages(page, compound_order(page)); } sg_free_table(table); kfree(buffer); } static const struct dma_buf_ops system_heap_buf_ops = { .attach = system_heap_attach, .detach = system_heap_detach, .map_dma_buf = system_heap_map_dma_buf, .unmap_dma_buf = system_heap_unmap_dma_buf, .begin_cpu_access = system_heap_dma_buf_begin_cpu_access, .end_cpu_access = system_heap_dma_buf_end_cpu_access, .mmap = system_heap_mmap, .vmap = system_heap_vmap, .vunmap = system_heap_vunmap, .release = system_heap_dma_buf_release, }; static struct page *alloc_largest_available(unsigned long size, unsigned int max_order) { struct page *page; int i; for (i = 0; i < NUM_ORDERS; i++) { if (size < (PAGE_SIZE << orders[i])) continue; if (max_order < orders[i]) continue; page = alloc_pages(order_flags[i], orders[i]); if (!page) continue; return page; } return NULL; } static struct dma_buf *system_heap_allocate(struct dma_heap *heap, unsigned long len, u32 fd_flags, u64 heap_flags) { struct system_heap_buffer *buffer; DEFINE_DMA_BUF_EXPORT_INFO(exp_info); unsigned long size_remaining = len; unsigned int max_order = orders[0]; struct dma_buf *dmabuf; struct sg_table *table; struct scatterlist *sg; struct list_head pages; struct page *page, *tmp_page; int i, ret = -ENOMEM; buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); if (!buffer) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&buffer->attachments); mutex_init(&buffer->lock); buffer->heap = heap; buffer->len = len; INIT_LIST_HEAD(&pages); i = 0; while (size_remaining > 0) { /* * Avoid trying to allocate memory if the process * has been killed by SIGKILL */ if (fatal_signal_pending(current)) { ret = -EINTR; goto free_buffer; } page = alloc_largest_available(size_remaining, max_order); if (!page) goto free_buffer; list_add_tail(&page->lru, &pages); size_remaining -= page_size(page); max_order = compound_order(page); i++; } table = &buffer->sg_table; if (sg_alloc_table(table, i, GFP_KERNEL)) goto free_buffer; sg = table->sgl; list_for_each_entry_safe(page, tmp_page, &pages, lru) { sg_set_page(sg, page, page_size(page), 0); sg = sg_next(sg); list_del(&page->lru); } /* create the dmabuf */ exp_info.exp_name = dma_heap_get_name(heap); exp_info.ops = &system_heap_buf_ops; exp_info.size = buffer->len; exp_info.flags = fd_flags; exp_info.priv = buffer; dmabuf = dma_buf_export(&exp_info); if (IS_ERR(dmabuf)) { ret = PTR_ERR(dmabuf); goto free_pages; } return dmabuf; free_pages: for_each_sgtable_sg(table, sg, i) { struct page *p = sg_page(sg); __free_pages(p, compound_order(p)); } sg_free_table(table); free_buffer: list_for_each_entry_safe(page, tmp_page, &pages, lru) __free_pages(page, compound_order(page)); kfree(buffer); return ERR_PTR(ret); } static const struct dma_heap_ops system_heap_ops = { .allocate = system_heap_allocate, }; static int __init system_heap_create(void) { struct dma_heap_export_info exp_info; exp_info.name = "system"; exp_info.ops = &system_heap_ops; exp_info.priv = NULL; sys_heap = dma_heap_add(&exp_info); if (IS_ERR(sys_heap)) return PTR_ERR(sys_heap); return 0; } module_init(system_heap_create);
1 1 2 1 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 // SPDX-License-Identifier: GPL-2.0-or-later /* * CDC Ethernet based networking peripherals * Copyright (C) 2003-2005 by David Brownell * Copyright (C) 2006 by Ole Andre Vadla Ravnas (ActiveSync) */ // #define DEBUG // error path messages, extra info // #define VERBOSE // more; success messages #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/usbnet.h> #if IS_ENABLED(CONFIG_USB_NET_RNDIS_HOST) static int is_rndis(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_COMM && desc->bInterfaceSubClass == 2 && desc->bInterfaceProtocol == 0xff); } static int is_activesync(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_MISC && desc->bInterfaceSubClass == 1 && desc->bInterfaceProtocol == 1); } static int is_wireless_rndis(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_WIRELESS_CONTROLLER && desc->bInterfaceSubClass == 1 && desc->bInterfaceProtocol == 3); } static int is_novatel_rndis(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_MISC && desc->bInterfaceSubClass == 4 && desc->bInterfaceProtocol == 1); } #else #define is_rndis(desc) 0 #define is_activesync(desc) 0 #define is_wireless_rndis(desc) 0 #define is_novatel_rndis(desc) 0 #endif static const u8 mbm_guid[16] = { 0xa3, 0x17, 0xa8, 0x8b, 0x04, 0x5e, 0x4f, 0x01, 0xa6, 0x07, 0xc0, 0xff, 0xcb, 0x7e, 0x39, 0x2a, }; void usbnet_cdc_update_filter(struct usbnet *dev) { struct net_device *net = dev->net; u16 cdc_filter = USB_CDC_PACKET_TYPE_DIRECTED | USB_CDC_PACKET_TYPE_BROADCAST; /* filtering on the device is an optional feature and not worth * the hassle so we just roughly care about snooping and if any * multicast is requested, we take every multicast */ if (net->flags & IFF_PROMISC) cdc_filter |= USB_CDC_PACKET_TYPE_PROMISCUOUS; if (!netdev_mc_empty(net) || (net->flags & IFF_ALLMULTI)) cdc_filter |= USB_CDC_PACKET_TYPE_ALL_MULTICAST; usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), USB_CDC_SET_ETHERNET_PACKET_FILTER, USB_TYPE_CLASS | USB_RECIP_INTERFACE, cdc_filter, dev->intf->cur_altsetting->desc.bInterfaceNumber, NULL, 0, USB_CTRL_SET_TIMEOUT ); } EXPORT_SYMBOL_GPL(usbnet_cdc_update_filter); /* We need to override usbnet_*_link_ksettings in bind() */ static const struct ethtool_ops cdc_ether_ethtool_ops = { .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_ts_info = ethtool_op_get_ts_info, .get_link_ksettings = usbnet_get_link_ksettings_internal, .set_link_ksettings = NULL, }; /* probes control interface, claims data interface, collects the bulk * endpoints, activates data interface (if needed), maybe sets MTU. * all pure cdc, except for certain firmware workarounds, and knowing * that rndis uses one different rule. */ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) { u8 *buf = intf->cur_altsetting->extra; int len = intf->cur_altsetting->extralen; struct usb_interface_descriptor *d; struct cdc_state *info = (void *) &dev->data; int status; int rndis; bool android_rndis_quirk = false; struct usb_driver *driver = driver_of(intf); struct usb_cdc_parsed_header header; if (sizeof(dev->data) < sizeof(*info)) return -EDOM; /* expect strict spec conformance for the descriptors, but * cope with firmware which stores them in the wrong place */ if (len == 0 && dev->udev->actconfig->extralen) { /* Motorola SB4100 (and others: Brad Hards says it's * from a Broadcom design) put CDC descriptors here */ buf = dev->udev->actconfig->extra; len = dev->udev->actconfig->extralen; dev_dbg(&intf->dev, "CDC descriptors on config\n"); } /* Maybe CDC descriptors are after the endpoint? This bug has * been seen on some 2Wire Inc RNDIS-ish products. */ if (len == 0) { struct usb_host_endpoint *hep; hep = intf->cur_altsetting->endpoint; if (hep) { buf = hep->extra; len = hep->extralen; } if (len) dev_dbg(&intf->dev, "CDC descriptors on endpoint\n"); } /* this assumes that if there's a non-RNDIS vendor variant * of cdc-acm, it'll fail RNDIS requests cleanly. */ rndis = (is_rndis(&intf->cur_altsetting->desc) || is_activesync(&intf->cur_altsetting->desc) || is_wireless_rndis(&intf->cur_altsetting->desc) || is_novatel_rndis(&intf->cur_altsetting->desc)); memset(info, 0, sizeof(*info)); info->control = intf; cdc_parse_cdc_header(&header, intf, buf, len); info->u = header.usb_cdc_union_desc; info->header = header.usb_cdc_header_desc; info->ether = header.usb_cdc_ether_desc; if (!info->u) { if (rndis) goto skip; else /* in that case a quirk is mandatory */ goto bad_desc; } /* we need a master/control interface (what we're * probed with) and a slave/data interface; union * descriptors sort this all out. */ info->control = usb_ifnum_to_if(dev->udev, info->u->bMasterInterface0); info->data = usb_ifnum_to_if(dev->udev, info->u->bSlaveInterface0); if (!info->control || !info->data) { dev_dbg(&intf->dev, "master #%u/%p slave #%u/%p\n", info->u->bMasterInterface0, info->control, info->u->bSlaveInterface0, info->data); /* fall back to hard-wiring for RNDIS */ if (rndis) { android_rndis_quirk = true; goto skip; } goto bad_desc; } if (info->control != intf) { dev_dbg(&intf->dev, "bogus CDC Union\n"); /* Ambit USB Cable Modem (and maybe others) * interchanges master and slave interface. */ if (info->data == intf) { info->data = info->control; info->control = intf; } else goto bad_desc; } /* some devices merge these - skip class check */ if (info->control == info->data) goto skip; /* a data interface altsetting does the real i/o */ d = &info->data->cur_altsetting->desc; if (d->bInterfaceClass != USB_CLASS_CDC_DATA) { dev_dbg(&intf->dev, "slave class %u\n", d->bInterfaceClass); goto bad_desc; } skip: /* Communication class functions with bmCapabilities are not * RNDIS. But some Wireless class RNDIS functions use * bmCapabilities for their own purpose. The failsafe is * therefore applied only to Communication class RNDIS * functions. The rndis test is redundant, but a cheap * optimization. */ if (rndis && is_rndis(&intf->cur_altsetting->desc) && header.usb_cdc_acm_descriptor && header.usb_cdc_acm_descriptor->bmCapabilities) { dev_dbg(&intf->dev, "ACM capabilities %02x, not really RNDIS?\n", header.usb_cdc_acm_descriptor->bmCapabilities); goto bad_desc; } if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(info->ether->wMaxSegmentSize); /* because of Zaurus, we may be ignoring the host * side link address we were given. */ } if (header.usb_cdc_mdlm_desc && memcmp(header.usb_cdc_mdlm_desc->bGUID, mbm_guid, 16)) { dev_dbg(&intf->dev, "GUID doesn't match\n"); goto bad_desc; } if (header.usb_cdc_mdlm_detail_desc && header.usb_cdc_mdlm_detail_desc->bLength < (sizeof(struct usb_cdc_mdlm_detail_desc) + 1)) { dev_dbg(&intf->dev, "Descriptor too short\n"); goto bad_desc; } /* Microsoft ActiveSync based and some regular RNDIS devices lack the * CDC descriptors, so we'll hard-wire the interfaces and not check * for descriptors. * * Some Android RNDIS devices have a CDC Union descriptor pointing * to non-existing interfaces. Ignore that and attempt the same * hard-wired 0 and 1 interfaces. */ if (rndis && (!info->u || android_rndis_quirk)) { info->control = usb_ifnum_to_if(dev->udev, 0); info->data = usb_ifnum_to_if(dev->udev, 1); if (!info->control || !info->data || info->control != intf) { dev_dbg(&intf->dev, "rndis: master #0/%p slave #1/%p\n", info->control, info->data); goto bad_desc; } } else if (!info->header || (!rndis && !info->ether)) { dev_dbg(&intf->dev, "missing cdc %s%s%sdescriptor\n", info->header ? "" : "header ", info->u ? "" : "union ", info->ether ? "" : "ether "); goto bad_desc; } /* claim data interface and set it up ... with side effects. * network traffic can't flow until an altsetting is enabled. */ if (info->data != info->control) { status = usb_driver_claim_interface(driver, info->data, dev); if (status < 0) return status; } status = usbnet_get_endpoints(dev, info->data); if (status < 0) { /* ensure immediate exit from usbnet_disconnect */ usb_set_intfdata(info->data, NULL); if (info->data != info->control) usb_driver_release_interface(driver, info->data); return status; } /* status endpoint: optional for CDC Ethernet, not RNDIS (or ACM) */ if (info->data != info->control) dev->status = NULL; if (info->control->cur_altsetting->desc.bNumEndpoints == 1) { struct usb_endpoint_descriptor *desc; dev->status = &info->control->cur_altsetting->endpoint[0]; desc = &dev->status->desc; if (!usb_endpoint_is_int_in(desc) || (le16_to_cpu(desc->wMaxPacketSize) < sizeof(struct usb_cdc_notification)) || !desc->bInterval) { dev_dbg(&intf->dev, "bad notification endpoint\n"); dev->status = NULL; } } if (rndis && !dev->status) { dev_dbg(&intf->dev, "missing RNDIS status endpoint\n"); usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver, info->data); return -ENODEV; } /* override ethtool_ops */ dev->net->ethtool_ops = &cdc_ether_ethtool_ops; return 0; bad_desc: dev_info(&dev->udev->dev, "bad CDC descriptors\n"); return -ENODEV; } EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind); /* like usbnet_generic_cdc_bind() but handles filter initialization * correctly */ int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) { int rv; rv = usbnet_generic_cdc_bind(dev, intf); if (rv < 0) goto bail_out; /* Some devices don't initialise properly. In particular * the packet filter is not reset. There are devices that * don't do reset all the way. So the packet filter should * be set to a sane initial value. */ usbnet_cdc_update_filter(dev); bail_out: return rv; } EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind); void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf) { struct cdc_state *info = (void *) &dev->data; struct usb_driver *driver = driver_of(intf); /* combined interface - nothing to do */ if (info->data == info->control) return; /* disconnect master --> disconnect slave */ if (intf == info->control && info->data) { /* ensure immediate exit from usbnet_disconnect */ usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver, info->data); info->data = NULL; } /* and vice versa (just in case) */ else if (intf == info->data && info->control) { /* ensure immediate exit from usbnet_disconnect */ usb_set_intfdata(info->control, NULL); usb_driver_release_interface(driver, info->control); info->control = NULL; } } EXPORT_SYMBOL_GPL(usbnet_cdc_unbind); /* Communications Device Class, Ethernet Control model * * Takes two interfaces. The DATA interface is inactive till an altsetting * is selected. Configuration data includes class descriptors. There's * an optional status endpoint on the control interface. * * This should interop with whatever the 2.4 "CDCEther.c" driver * (by Brad Hards) talked with, with more functionality. */ static void speed_change(struct usbnet *dev, __le32 *speeds) { dev->tx_speed = __le32_to_cpu(speeds[0]); dev->rx_speed = __le32_to_cpu(speeds[1]); } void usbnet_cdc_status(struct usbnet *dev, struct urb *urb) { struct usb_cdc_notification *event; if (urb->actual_length < sizeof(*event)) return; /* SPEED_CHANGE can get split into two 8-byte packets */ if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) { speed_change(dev, (__le32 *) urb->transfer_buffer); return; } event = urb->transfer_buffer; switch (event->bNotificationType) { case USB_CDC_NOTIFY_NETWORK_CONNECTION: netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", event->wValue ? "on" : "off"); if (netif_carrier_ok(dev->net) != !!event->wValue) usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: /* tx/rx rates */ netif_dbg(dev, timer, dev->net, "CDC: speed change (len %d)\n", urb->actual_length); if (urb->actual_length != (sizeof(*event) + 8)) set_bit(EVENT_STS_SPLIT, &dev->flags); else speed_change(dev, (__le32 *) &event[1]); break; /* USB_CDC_NOTIFY_RESPONSE_AVAILABLE can happen too (e.g. RNDIS), * but there are no standard formats for the response data. */ default: netdev_err(dev->net, "CDC: unexpected notification %02x!\n", event->bNotificationType); break; } } EXPORT_SYMBOL_GPL(usbnet_cdc_status); int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf) { int status; struct cdc_state *info = (void *) &dev->data; BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) < sizeof(struct cdc_state))); status = usbnet_ether_cdc_bind(dev, intf); if (status < 0) return status; status = usbnet_get_ethernet_addr(dev, info->ether->iMACAddress); if (status < 0) { usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver_of(intf), info->data); return status; } return 0; } EXPORT_SYMBOL_GPL(usbnet_cdc_bind); static int usbnet_cdc_zte_bind(struct usbnet *dev, struct usb_interface *intf) { int status = usbnet_cdc_bind(dev, intf); if (!status && (dev->net->dev_addr[0] & 0x02)) eth_hw_addr_random(dev->net); return status; } /* Make sure packets have correct destination MAC address * * A firmware bug observed on some devices (ZTE MF823/831/910) is that the * device sends packets with a static, bogus, random MAC address (event if * device MAC address has been updated). Always set MAC address to that of the * device. */ int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { if (skb->len < ETH_HLEN || !(skb->data[0] & 0x02)) return 1; skb_reset_mac_header(skb); ether_addr_copy(eth_hdr(skb)->h_dest, dev->net->dev_addr); return 1; } EXPORT_SYMBOL_GPL(usbnet_cdc_zte_rx_fixup); /* Ensure correct link state * * Some devices (ZTE MF823/831/910) export two carrier on notifications when * connected. This causes the link state to be incorrect. Work around this by * always setting the state to off, then on. */ static void usbnet_cdc_zte_status(struct usbnet *dev, struct urb *urb) { struct usb_cdc_notification *event; if (urb->actual_length < sizeof(*event)) return; event = urb->transfer_buffer; if (event->bNotificationType != USB_CDC_NOTIFY_NETWORK_CONNECTION) { usbnet_cdc_status(dev, urb); return; } netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", event->wValue ? "on" : "off"); if (event->wValue && netif_carrier_ok(dev->net)) netif_carrier_off(dev->net); usbnet_link_change(dev, !!event->wValue, 0); } static const struct driver_info cdc_info = { .description = "CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, }; static const struct driver_info zte_cdc_info = { .description = "ZTE CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, .bind = usbnet_cdc_zte_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_zte_status, .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, .rx_fixup = usbnet_cdc_zte_rx_fixup, }; static const struct driver_info wwan_info = { .description = "Mobile Broadband Network Device", .flags = FLAG_WWAN, .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, }; /*-------------------------------------------------------------------------*/ #define HUAWEI_VENDOR_ID 0x12D1 #define NOVATEL_VENDOR_ID 0x1410 #define ZTE_VENDOR_ID 0x19D2 #define DELL_VENDOR_ID 0x413C #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef #define LINKSYS_VENDOR_ID 0x13b1 #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 #define MICROSOFT_VENDOR_ID 0x045e #define UBLOX_VENDOR_ID 0x1546 #define TPLINK_VENDOR_ID 0x2357 #define AQUANTIA_VENDOR_ID 0x2eca #define ASIX_VENDOR_ID 0x0b95 static const struct usb_device_id products[] = { /* BLACKLIST !! * * First blacklist any products that are egregiously nonconformant * with the CDC Ethernet specs. Minor braindamage we cope with; when * they're not even trying, needing a separate driver is only the first * of the differences to show up. */ #define ZAURUS_MASTER_INTERFACE \ .bInterfaceClass = USB_CLASS_COMM, \ .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE #define ZAURUS_FAKE_INTERFACE \ .bInterfaceClass = USB_CLASS_COMM, \ .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE /* SA-1100 based Sharp Zaurus ("collie"), or compatible; * wire-incompatible with true CDC Ethernet implementations. * (And, it seems, needlessly so...) */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8004, ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, /* PXA-25x based Sharp Zaurii. Note that it seems some of these * (later models especially) may have shipped only with firmware * advertising false "CDC MDLM" compatibility ... but we're not * clear which models did that, so for now let's assume the worst. */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8005, /* A-300 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8005, /* A-300 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8007, /* C-700 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8007, /* C-700 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9031, /* C-750 C-760 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9032, /* SL-6000 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9032, /* SL-6000 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, /* reported with some C860 units */ .idProduct = 0x9050, /* C-860 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, /* Olympus has some models with a Zaurus-compatible option. * R-1000 uses a FreeScale i.MXL cpu (ARMv4T) */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x07B4, .idProduct = 0x0F02, /* R-1000 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, /* LG Electronics VL600 wants additional headers on every frame */ { USB_DEVICE_AND_INTERFACE_INFO(0x1004, 0x61aa, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Logitech Harmony 900 - uses the pseudo-MDLM (BLAN) driver */ { USB_DEVICE_AND_INTERFACE_INFO(0x046d, 0xc11f, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel USB551L and MC551 - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0xB001, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel E362 - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0x9010, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8195, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8196, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Dell Wireless 5804 (Novatel E371) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x819b, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel Expedite E371 - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0x9011, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* HP lt2523 (Novatel E371) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(HP_VENDOR_ID, 0x421d, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* AnyDATA ADU960S - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Huawei E1820 - handled by qmi_wwan */ { USB_DEVICE_INTERFACE_NUMBER(HUAWEI_VENDOR_ID, 0x14ac, 1), .driver_info = 0, }, /* Realtek RTL8153 Based USB 3.0 Ethernet Adapters */ { USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8153, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x721e, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Aquantia AQtion USB to 5GbE Controller (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(AQUANTIA_VENDOR_ID, 0xc101, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* ASIX USB 3.1 Gen1 to 5G Multi-Gigabit Ethernet Adapter(based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(ASIX_VENDOR_ID, 0x2790, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* ASIX USB 3.1 Gen1 to 2.5G Multi-Gigabit Ethernet Adapter(based on AQC112U) */ { USB_DEVICE_AND_INTERFACE_INFO(ASIX_VENDOR_ID, 0x2791, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* USB-C 3.1 to 5GBASE-T Ethernet Adapter (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0xe05a, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* QNAP QNA-UC5G1T USB to 5GbE Adapter (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(0x1c04, 0x0015, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. * We match the main interface, ignoring the optional device * class so we could handle devices that aren't exclusively * CDC ether. * * NOTE: this match must come AFTER entries blacklisting devices * because of bugs/quirks in a given product (like Zaurus, above). */ { /* ZTE (Vodafone) K3805-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1003, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K3806-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1015, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K4510-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1173, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K3770-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1177, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K3772-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1181, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Telit modules */ USB_VENDOR_AND_INTERFACE_INFO(0x1bc7, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (kernel_ulong_t) &wwan_info, }, { /* Dell DW5580 modules */ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x81ba, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (kernel_ulong_t)&wwan_info, }, { /* Huawei ME906 and ME909 */ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x15c1, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE modules */ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&zte_cdc_info, }, { /* U-blox TOBY-L2 */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1143, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* U-blox SARA-U2 */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1104, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* U-blox LARA-R6 01B */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1313, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* U-blox LARA-L6 */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1343, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion PLS8 modem by GEMALTO */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0061, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion AHS3 modem by GEMALTO */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion PLS62-W modem by GEMALTO/THALES */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Various Huawei modems with a network port like the UMG1831 */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, 255), .driver_info = (unsigned long)&wwan_info, }, { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver cdc_driver = { .name = "cdc_ether", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .reset_resume = usbnet_resume, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; module_usb_driver(cdc_driver); MODULE_AUTHOR("David Brownell"); MODULE_DESCRIPTION("USB CDC Ethernet devices"); MODULE_LICENSE("GPL");
1 8 1 7 7 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2003-2008 Takahiro Hirofuchi * Copyright (C) 2015-2016 Nobuo Iwata */ #include <linux/kthread.h> #include <linux/file.h> #include <linux/net.h> #include <linux/platform_device.h> #include <linux/slab.h> /* Hardening for Spectre-v1 */ #include <linux/nospec.h> #include "usbip_common.h" #include "vhci.h" /* TODO: refine locking ?*/ /* * output example: * hub port sta spd dev sockfd local_busid * hs 0000 004 000 00000000 000003 1-2.3 * ................................................ * ss 0008 004 000 00000000 000004 2-3.4 * ................................................ * * Output includes socket fd instead of socket pointer address to avoid * leaking kernel memory address in: * /sys/devices/platform/vhci_hcd.0/status and in debug output. * The socket pointer address is not used at the moment and it was made * visible as a convenient way to find IP address from socket pointer * address by looking up /proc/net/{tcp,tcp6}. As this opens a security * hole, the change is made to use sockfd instead. * */ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vdev) { if (hub == HUB_SPEED_HIGH) *out += sprintf(*out, "hs %04u %03u ", port, vdev->ud.status); else /* hub == HUB_SPEED_SUPER */ *out += sprintf(*out, "ss %04u %03u ", port, vdev->ud.status); if (vdev->ud.status == VDEV_ST_USED) { *out += sprintf(*out, "%03u %08x ", vdev->speed, vdev->devid); *out += sprintf(*out, "%06u %s", vdev->ud.sockfd, dev_name(&vdev->udev->dev)); } else { *out += sprintf(*out, "000 00000000 "); *out += sprintf(*out, "000000 0-0"); } *out += sprintf(*out, "\n"); } /* Sysfs entry to show port status */ static ssize_t status_show_vhci(int pdev_nr, char *out) { struct platform_device *pdev = vhcis[pdev_nr].pdev; struct vhci *vhci; struct usb_hcd *hcd; struct vhci_hcd *vhci_hcd; char *s = out; int i; unsigned long flags; if (!pdev || !out) { usbip_dbg_vhci_sysfs("show status error\n"); return 0; } hcd = platform_get_drvdata(pdev); vhci_hcd = hcd_to_vhci_hcd(hcd); vhci = vhci_hcd->vhci; spin_lock_irqsave(&vhci->lock, flags); for (i = 0; i < VHCI_HC_PORTS; i++) { struct vhci_device *vdev = &vhci->vhci_hcd_hs->vdev[i]; spin_lock(&vdev->ud.lock); port_show_vhci(&out, HUB_SPEED_HIGH, pdev_nr * VHCI_PORTS + i, vdev); spin_unlock(&vdev->ud.lock); } for (i = 0; i < VHCI_HC_PORTS; i++) { struct vhci_device *vdev = &vhci->vhci_hcd_ss->vdev[i]; spin_lock(&vdev->ud.lock); port_show_vhci(&out, HUB_SPEED_SUPER, pdev_nr * VHCI_PORTS + VHCI_HC_PORTS + i, vdev); spin_unlock(&vdev->ud.lock); } spin_unlock_irqrestore(&vhci->lock, flags); return out - s; } static ssize_t status_show_not_ready(int pdev_nr, char *out) { char *s = out; int i = 0; for (i = 0; i < VHCI_HC_PORTS; i++) { out += sprintf(out, "hs %04u %03u ", (pdev_nr * VHCI_PORTS) + i, VDEV_ST_NOTASSIGNED); out += sprintf(out, "000 00000000 0000000000000000 0-0"); out += sprintf(out, "\n"); } for (i = 0; i < VHCI_HC_PORTS; i++) { out += sprintf(out, "ss %04u %03u ", (pdev_nr * VHCI_PORTS) + VHCI_HC_PORTS + i, VDEV_ST_NOTASSIGNED); out += sprintf(out, "000 00000000 0000000000000000 0-0"); out += sprintf(out, "\n"); } return out - s; } static int status_name_to_id(const char *name) { char *c; long val; int ret; c = strchr(name, '.'); if (c == NULL) return 0; ret = kstrtol(c+1, 10, &val); if (ret < 0) return ret; return val; } static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *out) { char *s = out; int pdev_nr; out += sprintf(out, "hub port sta spd dev sockfd local_busid\n"); pdev_nr = status_name_to_id(attr->attr.name); if (pdev_nr < 0) out += status_show_not_ready(pdev_nr, out); else out += status_show_vhci(pdev_nr, out); return out - s; } static ssize_t nports_show(struct device *dev, struct device_attribute *attr, char *out) { char *s = out; /* * Half the ports are for SPEED_HIGH and half for SPEED_SUPER, * thus the * 2. */ out += sprintf(out, "%d\n", VHCI_PORTS * vhci_num_controllers); return out - s; } static DEVICE_ATTR_RO(nports); /* Sysfs entry to shutdown a virtual connection */ static int vhci_port_disconnect(struct vhci_hcd *vhci_hcd, __u32 rhport) { struct vhci_device *vdev = &vhci_hcd->vdev[rhport]; struct vhci *vhci = vhci_hcd->vhci; unsigned long flags; usbip_dbg_vhci_sysfs("enter\n"); mutex_lock(&vdev->ud.sysfs_lock); /* lock */ spin_lock_irqsave(&vhci->lock, flags); spin_lock(&vdev->ud.lock); if (vdev->ud.status == VDEV_ST_NULL) { pr_err("not connected %d\n", vdev->ud.status); /* unlock */ spin_unlock(&vdev->ud.lock); spin_unlock_irqrestore(&vhci->lock, flags); mutex_unlock(&vdev->ud.sysfs_lock); return -EINVAL; } /* unlock */ spin_unlock(&vdev->ud.lock); spin_unlock_irqrestore(&vhci->lock, flags); usbip_event_add(&vdev->ud, VDEV_EVENT_DOWN); mutex_unlock(&vdev->ud.sysfs_lock); return 0; } static int valid_port(__u32 *pdev_nr, __u32 *rhport) { if (*pdev_nr >= vhci_num_controllers) { pr_err("pdev %u\n", *pdev_nr); return 0; } *pdev_nr = array_index_nospec(*pdev_nr, vhci_num_controllers); if (*rhport >= VHCI_HC_PORTS) { pr_err("rhport %u\n", *rhport); return 0; } *rhport = array_index_nospec(*rhport, VHCI_HC_PORTS); return 1; } static ssize_t detach_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { __u32 port = 0, pdev_nr = 0, rhport = 0; struct usb_hcd *hcd; struct vhci_hcd *vhci_hcd; int ret; if (kstrtoint(buf, 10, &port) < 0) return -EINVAL; pdev_nr = port_to_pdev_nr(port); rhport = port_to_rhport(port); if (!valid_port(&pdev_nr, &rhport)) return -EINVAL; hcd = platform_get_drvdata(vhcis[pdev_nr].pdev); if (hcd == NULL) { dev_err(dev, "port is not ready %u\n", port); return -EAGAIN; } usbip_dbg_vhci_sysfs("rhport %d\n", rhport); if ((port / VHCI_HC_PORTS) % 2) vhci_hcd = hcd_to_vhci_hcd(hcd)->vhci->vhci_hcd_ss; else vhci_hcd = hcd_to_vhci_hcd(hcd)->vhci->vhci_hcd_hs; ret = vhci_port_disconnect(vhci_hcd, rhport); if (ret < 0) return -EINVAL; usbip_dbg_vhci_sysfs("Leave\n"); return count; } static DEVICE_ATTR_WO(detach); static int valid_args(__u32 *pdev_nr, __u32 *rhport, enum usb_device_speed speed) { if (!valid_port(pdev_nr, rhport)) { return 0; } switch (speed) { case USB_SPEED_LOW: case USB_SPEED_FULL: case USB_SPEED_HIGH: case USB_SPEED_WIRELESS: case USB_SPEED_SUPER: case USB_SPEED_SUPER_PLUS: break; default: pr_err("Failed attach request for unsupported USB speed: %s\n", usb_speed_string(speed)); return 0; } return 1; } /* Sysfs entry to establish a virtual connection */ /* * To start a new USB/IP attachment, a userland program needs to setup a TCP * connection and then write its socket descriptor with remote device * information into this sysfs file. * * A remote device is virtually attached to the root-hub port of @rhport with * @speed. @devid is embedded into a request to specify the remote device in a * server host. * * write() returns 0 on success, else negative errno. */ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct socket *socket; int sockfd = 0; __u32 port = 0, pdev_nr = 0, rhport = 0, devid = 0, speed = 0; struct usb_hcd *hcd; struct vhci_hcd *vhci_hcd; struct vhci_device *vdev; struct vhci *vhci; int err; unsigned long flags; struct task_struct *tcp_rx = NULL; struct task_struct *tcp_tx = NULL; /* * @rhport: port number of vhci_hcd * @sockfd: socket descriptor of an established TCP connection * @devid: unique device identifier in a remote host * @speed: usb device speed in a remote host */ if (sscanf(buf, "%u %u %u %u", &port, &sockfd, &devid, &speed) != 4) return -EINVAL; pdev_nr = port_to_pdev_nr(port); rhport = port_to_rhport(port); usbip_dbg_vhci_sysfs("port(%u) pdev(%d) rhport(%u)\n", port, pdev_nr, rhport); usbip_dbg_vhci_sysfs("sockfd(%u) devid(%u) speed(%u)\n", sockfd, devid, speed); /* check received parameters */ if (!valid_args(&pdev_nr, &rhport, speed)) return -EINVAL; hcd = platform_get_drvdata(vhcis[pdev_nr].pdev); if (hcd == NULL) { dev_err(dev, "port %d is not ready\n", port); return -EAGAIN; } vhci_hcd = hcd_to_vhci_hcd(hcd); vhci = vhci_hcd->vhci; if (speed >= USB_SPEED_SUPER) vdev = &vhci->vhci_hcd_ss->vdev[rhport]; else vdev = &vhci->vhci_hcd_hs->vdev[rhport]; mutex_lock(&vdev->ud.sysfs_lock); /* Extract socket from fd. */ socket = sockfd_lookup(sockfd, &err); if (!socket) { dev_err(dev, "failed to lookup sock"); err = -EINVAL; goto unlock_mutex; } if (socket->type != SOCK_STREAM) { dev_err(dev, "Expecting SOCK_STREAM - found %d", socket->type); sockfd_put(socket); err = -EINVAL; goto unlock_mutex; } /* create threads before locking */ tcp_rx = kthread_create(vhci_rx_loop, &vdev->ud, "vhci_rx"); if (IS_ERR(tcp_rx)) { sockfd_put(socket); err = -EINVAL; goto unlock_mutex; } tcp_tx = kthread_create(vhci_tx_loop, &vdev->ud, "vhci_tx"); if (IS_ERR(tcp_tx)) { kthread_stop(tcp_rx); sockfd_put(socket); err = -EINVAL; goto unlock_mutex; } /* get task structs now */ get_task_struct(tcp_rx); get_task_struct(tcp_tx); /* now begin lock until setting vdev status set */ spin_lock_irqsave(&vhci->lock, flags); spin_lock(&vdev->ud.lock); if (vdev->ud.status != VDEV_ST_NULL) { /* end of the lock */ spin_unlock(&vdev->ud.lock); spin_unlock_irqrestore(&vhci->lock, flags); sockfd_put(socket); kthread_stop_put(tcp_rx); kthread_stop_put(tcp_tx); dev_err(dev, "port %d already used\n", rhport); /* * Will be retried from userspace * if there's another free port. */ err = -EBUSY; goto unlock_mutex; } dev_info(dev, "pdev(%u) rhport(%u) sockfd(%d)\n", pdev_nr, rhport, sockfd); dev_info(dev, "devid(%u) speed(%u) speed_str(%s)\n", devid, speed, usb_speed_string(speed)); vdev->devid = devid; vdev->speed = speed; vdev->ud.sockfd = sockfd; vdev->ud.tcp_socket = socket; vdev->ud.tcp_rx = tcp_rx; vdev->ud.tcp_tx = tcp_tx; vdev->ud.status = VDEV_ST_NOTASSIGNED; usbip_kcov_handle_init(&vdev->ud); spin_unlock(&vdev->ud.lock); spin_unlock_irqrestore(&vhci->lock, flags); /* end the lock */ wake_up_process(vdev->ud.tcp_rx); wake_up_process(vdev->ud.tcp_tx); rh_port_connect(vdev, speed); dev_info(dev, "Device attached\n"); mutex_unlock(&vdev->ud.sysfs_lock); return count; unlock_mutex: mutex_unlock(&vdev->ud.sysfs_lock); return err; } static DEVICE_ATTR_WO(attach); #define MAX_STATUS_NAME 16 struct status_attr { struct device_attribute attr; char name[MAX_STATUS_NAME+1]; }; static struct status_attr *status_attrs; static void set_status_attr(int id) { struct status_attr *status; status = status_attrs + id; if (id == 0) strcpy(status->name, "status"); else snprintf(status->name, MAX_STATUS_NAME+1, "status.%d", id); status->attr.attr.name = status->name; status->attr.attr.mode = S_IRUGO; status->attr.show = status_show; sysfs_attr_init(&status->attr.attr); } static int init_status_attrs(void) { int id; status_attrs = kcalloc(vhci_num_controllers, sizeof(struct status_attr), GFP_KERNEL); if (status_attrs == NULL) return -ENOMEM; for (id = 0; id < vhci_num_controllers; id++) set_status_attr(id); return 0; } static void finish_status_attrs(void) { kfree(status_attrs); } struct attribute_group vhci_attr_group = { .attrs = NULL, }; int vhci_init_attr_group(void) { struct attribute **attrs; int ret, i; attrs = kcalloc((vhci_num_controllers + 5), sizeof(struct attribute *), GFP_KERNEL); if (attrs == NULL) return -ENOMEM; ret = init_status_attrs(); if (ret) { kfree(attrs); return ret; } *attrs = &dev_attr_nports.attr; *(attrs + 1) = &dev_attr_detach.attr; *(attrs + 2) = &dev_attr_attach.attr; *(attrs + 3) = &dev_attr_usbip_debug.attr; for (i = 0; i < vhci_num_controllers; i++) *(attrs + i + 4) = &((status_attrs + i)->attr.attr); vhci_attr_group.attrs = attrs; return 0; } void vhci_finish_attr_group(void) { finish_status_attrs(); kfree(vhci_attr_group.attrs); }
30 10 20 20 20 20 20 20 4 4 1 2 1 15 15 14 1 132 132 132 132 132 2 1 1 13 13 12 8 4 12 2 1 1 2 8 3 7 15 15 15 132 132 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 // SPDX-License-Identifier: GPL-2.0-only /* * vxcan.c - Virtual CAN Tunnel for cross namespace communication * * This code is derived from drivers/net/can/vcan.c for the virtual CAN * specific parts and from drivers/net/veth.c to implement the netlink API * for network interface pairs in a common and established way. * * Copyright (c) 2017 Oliver Hartkopp <socketcan@hartkopp.net> */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/can/vxcan.h> #include <linux/can/can-ml.h> #include <linux/slab.h> #include <net/rtnetlink.h> #define DRV_NAME "vxcan" MODULE_DESCRIPTION("Virtual CAN Tunnel"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); struct vxcan_priv { struct net_device __rcu *peer; }; static netdev_tx_t vxcan_xmit(struct sk_buff *oskb, struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer; struct net_device_stats *peerstats, *srcstats = &dev->stats; struct sk_buff *skb; unsigned int len; if (can_dropped_invalid_skb(dev, oskb)) return NETDEV_TX_OK; rcu_read_lock(); peer = rcu_dereference(priv->peer); if (unlikely(!peer)) { kfree_skb(oskb); dev->stats.tx_dropped++; goto out_unlock; } skb_tx_timestamp(oskb); skb = skb_clone(oskb, GFP_ATOMIC); if (skb) { consume_skb(oskb); } else { kfree_skb(oskb); goto out_unlock; } /* reset CAN GW hop counter */ skb->csum_start = 0; skb->pkt_type = PACKET_BROADCAST; skb->dev = peer; skb->ip_summed = CHECKSUM_UNNECESSARY; len = can_skb_get_data_len(skb); if (netif_rx(skb) == NET_RX_SUCCESS) { srcstats->tx_packets++; srcstats->tx_bytes += len; peerstats = &peer->stats; peerstats->rx_packets++; peerstats->rx_bytes += len; } out_unlock: rcu_read_unlock(); return NETDEV_TX_OK; } static int vxcan_open(struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); if (!peer) return -ENOTCONN; if (peer->flags & IFF_UP) { netif_carrier_on(dev); netif_carrier_on(peer); } return 0; } static int vxcan_close(struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); netif_carrier_off(dev); if (peer) netif_carrier_off(peer); return 0; } static int vxcan_get_iflink(const struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer; int iflink; rcu_read_lock(); peer = rcu_dereference(priv->peer); iflink = peer ? READ_ONCE(peer->ifindex) : 0; rcu_read_unlock(); return iflink; } static int vxcan_change_mtu(struct net_device *dev, int new_mtu) { /* Do not allow changing the MTU while running */ if (dev->flags & IFF_UP) return -EBUSY; if (new_mtu != CAN_MTU && new_mtu != CANFD_MTU && !can_is_canxl_dev_mtu(new_mtu)) return -EINVAL; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static const struct net_device_ops vxcan_netdev_ops = { .ndo_open = vxcan_open, .ndo_stop = vxcan_close, .ndo_start_xmit = vxcan_xmit, .ndo_get_iflink = vxcan_get_iflink, .ndo_change_mtu = vxcan_change_mtu, }; static const struct ethtool_ops vxcan_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; static void vxcan_setup(struct net_device *dev) { struct can_ml_priv *can_ml; dev->type = ARPHRD_CAN; dev->mtu = CANFD_MTU; dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 0; dev->flags = IFF_NOARP; dev->netdev_ops = &vxcan_netdev_ops; dev->ethtool_ops = &vxcan_ethtool_ops; dev->needs_free_netdev = true; can_ml = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN); can_set_ml_priv(dev, can_ml); } /* forward declaration for rtnl_create_link() */ static struct rtnl_link_ops vxcan_link_ops; static int vxcan_newlink(struct net *peer_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct vxcan_priv *priv; struct net_device *peer; struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb; char ifname[IFNAMSIZ]; unsigned char name_assign_type; struct ifinfomsg *ifmp = NULL; int err; /* register peer device */ if (data && data[VXCAN_INFO_PEER]) { struct nlattr *nla_peer = data[VXCAN_INFO_PEER]; ifmp = nla_data(nla_peer); rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); tbp = peer_tb; } if (ifmp && tbp[IFLA_IFNAME]) { nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); name_assign_type = NET_NAME_ENUM; } peer = rtnl_create_link(peer_net, ifname, name_assign_type, &vxcan_link_ops, tbp, extack); if (IS_ERR(peer)) return PTR_ERR(peer); if (ifmp && dev->ifindex) peer->ifindex = ifmp->ifi_index; err = register_netdevice(peer); if (err < 0) { free_netdev(peer); return err; } netif_carrier_off(peer); err = rtnl_configure_link(peer, ifmp, 0, NULL); if (err < 0) goto unregister_network_device; /* register first device */ if (tb[IFLA_IFNAME]) nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); else snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); err = register_netdevice(dev); if (err < 0) goto unregister_network_device; netif_carrier_off(dev); /* cross link the device pair */ priv = netdev_priv(dev); rcu_assign_pointer(priv->peer, peer); priv = netdev_priv(peer); rcu_assign_pointer(priv->peer, dev); return 0; unregister_network_device: unregister_netdevice(peer); return err; } static void vxcan_dellink(struct net_device *dev, struct list_head *head) { struct vxcan_priv *priv; struct net_device *peer; priv = netdev_priv(dev); peer = rtnl_dereference(priv->peer); /* Note : dellink() is called from default_device_exit_batch(), * before a rcu_synchronize() point. The devices are guaranteed * not being freed before one RCU grace period. */ RCU_INIT_POINTER(priv->peer, NULL); unregister_netdevice_queue(dev, head); if (peer) { priv = netdev_priv(peer); RCU_INIT_POINTER(priv->peer, NULL); unregister_netdevice_queue(peer, head); } } static const struct nla_policy vxcan_policy[VXCAN_INFO_MAX + 1] = { [VXCAN_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, }; static struct net *vxcan_get_link_net(const struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); return peer ? dev_net(peer) : dev_net(dev); } static struct rtnl_link_ops vxcan_link_ops = { .kind = DRV_NAME, .priv_size = ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN) + sizeof(struct can_ml_priv), .setup = vxcan_setup, .newlink = vxcan_newlink, .dellink = vxcan_dellink, .policy = vxcan_policy, .peer_type = VXCAN_INFO_PEER, .maxtype = VXCAN_INFO_MAX, .get_link_net = vxcan_get_link_net, }; static __init int vxcan_init(void) { pr_info("vxcan: Virtual CAN Tunnel driver\n"); return rtnl_link_register(&vxcan_link_ops); } static __exit void vxcan_exit(void) { rtnl_link_unregister(&vxcan_link_ops); } module_init(vxcan_init); module_exit(vxcan_exit);
10 10 2 4 2 2 5 2 1 3 1 1 11 1 1 8 1 2 3 2 3 2 3 13 1 1 7 5 1 2 4 2 2 3 3 5 3 5 4 1 7 7 3 16 17 3 10 2 2 12 8 11 2 10 8 2 12 9 9 10 1 5 2 21 2 18 2 4 9 2 9 9 8 8 1 7 1 6 4 2 8 23 19 2 3 12 10 448 446 9 1 2 29 1 3 1 11 2 16 7 3 1 1 1 10 1 1 1 1 1 7 14 16 14 19 32 1 7 1 1 2 17 11 12 21 1 12 1 45 1 13 1 18 1 16 1 6 1 10 1 20 1 21 1 23 454 1 2 1 454 453 2 6 1 5 1 3 468 1 12 1 459 457 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 // SPDX-License-Identifier: GPL-2.0 /* * Code related to the io_uring_register() syscall * * Copyright (C) 2023 Jens Axboe */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/syscalls.h> #include <linux/refcount.h> #include <linux/bits.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/nospec.h> #include <linux/compat.h> #include <linux/io_uring.h> #include <linux/io_uring_types.h> #include "io_uring.h" #include "opdef.h" #include "tctx.h" #include "rsrc.h" #include "sqpoll.h" #include "register.h" #include "cancel.h" #include "kbuf.h" #include "napi.h" #include "eventfd.h" #include "msg_ring.h" #include "memmap.h" #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ IORING_REGISTER_LAST + IORING_OP_LAST) static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { struct io_uring_probe *p; size_t size; int i, ret; if (nr_args > IORING_OP_LAST) nr_args = IORING_OP_LAST; size = struct_size(p, ops, nr_args); p = kzalloc(size, GFP_KERNEL); if (!p) return -ENOMEM; ret = -EFAULT; if (copy_from_user(p, arg, size)) goto out; ret = -EINVAL; if (memchr_inv(p, 0, size)) goto out; p->last_op = IORING_OP_LAST - 1; for (i = 0; i < nr_args; i++) { p->ops[i].op = i; if (io_uring_op_supported(i)) p->ops[i].flags = IO_URING_OP_SUPPORTED; } p->ops_len = i; ret = 0; if (copy_to_user(arg, p, size)) ret = -EFAULT; out: kfree(p); return ret; } int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) { const struct cred *creds; creds = xa_erase(&ctx->personalities, id); if (creds) { put_cred(creds); return 0; } return -EINVAL; } static int io_register_personality(struct io_ring_ctx *ctx) { const struct cred *creds; u32 id; int ret; creds = get_current_cred(); ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); if (ret < 0) { put_cred(creds); return ret; } return id; } static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args, struct io_restriction *restrictions) { struct io_uring_restriction *res; size_t size; int i, ret; if (!arg || nr_args > IORING_MAX_RESTRICTIONS) return -EINVAL; size = array_size(nr_args, sizeof(*res)); if (size == SIZE_MAX) return -EOVERFLOW; res = memdup_user(arg, size); if (IS_ERR(res)) return PTR_ERR(res); ret = -EINVAL; for (i = 0; i < nr_args; i++) { switch (res[i].opcode) { case IORING_RESTRICTION_REGISTER_OP: if (res[i].register_op >= IORING_REGISTER_LAST) goto err; __set_bit(res[i].register_op, restrictions->register_op); break; case IORING_RESTRICTION_SQE_OP: if (res[i].sqe_op >= IORING_OP_LAST) goto err; __set_bit(res[i].sqe_op, restrictions->sqe_op); break; case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: restrictions->sqe_flags_allowed = res[i].sqe_flags; break; case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: restrictions->sqe_flags_required = res[i].sqe_flags; break; default: goto err; } } ret = 0; err: kfree(res); return ret; } static __cold int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, unsigned int nr_args) { int ret; /* Restrictions allowed only if rings started disabled */ if (!(ctx->flags & IORING_SETUP_R_DISABLED)) return -EBADFD; /* We allow only a single restrictions registration */ if (ctx->restrictions.registered) return -EBUSY; ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions); /* Reset all restrictions if an error happened */ if (ret != 0) memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); else ctx->restrictions.registered = true; return ret; } static int io_register_enable_rings(struct io_ring_ctx *ctx) { if (!(ctx->flags & IORING_SETUP_R_DISABLED)) return -EBADFD; if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); /* * Lazy activation attempts would fail if it was polled before * submitter_task is set. */ if (wq_has_sleeper(&ctx->poll_wq)) io_activate_pollwq(ctx); } if (ctx->restrictions.registered) ctx->restricted = 1; ctx->flags &= ~IORING_SETUP_R_DISABLED; if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); return 0; } static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx, cpumask_var_t new_mask) { int ret; if (!(ctx->flags & IORING_SETUP_SQPOLL)) { ret = io_wq_cpu_affinity(current->io_uring, new_mask); } else { mutex_unlock(&ctx->uring_lock); ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask); mutex_lock(&ctx->uring_lock); } return ret; } static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, unsigned len) { cpumask_var_t new_mask; int ret; if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) return -ENOMEM; cpumask_clear(new_mask); if (len > cpumask_size()) len = cpumask_size(); #ifdef CONFIG_COMPAT if (in_compat_syscall()) ret = compat_get_bitmap(cpumask_bits(new_mask), (const compat_ulong_t __user *)arg, len * 8 /* CHAR_BIT */); else #endif ret = copy_from_user(new_mask, arg, len); if (ret) { free_cpumask_var(new_mask); return -EFAULT; } ret = __io_register_iowq_aff(ctx, new_mask); free_cpumask_var(new_mask); return ret; } static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) { return __io_register_iowq_aff(ctx, NULL); } static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, void __user *arg) __must_hold(&ctx->uring_lock) { struct io_tctx_node *node; struct io_uring_task *tctx = NULL; struct io_sq_data *sqd = NULL; __u32 new_count[2]; int i, ret; if (copy_from_user(new_count, arg, sizeof(new_count))) return -EFAULT; for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i] > INT_MAX) return -EINVAL; if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { /* * Observe the correct sqd->lock -> ctx->uring_lock * ordering. Fine to drop uring_lock here, we hold * a ref to the ctx. */ refcount_inc(&sqd->refs); mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); if (sqd->thread) tctx = sqd->thread->io_uring; } } else { tctx = current->io_uring; } BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i]) ctx->iowq_limits[i] = new_count[i]; ctx->iowq_limits_set = true; if (tctx && tctx->io_wq) { ret = io_wq_max_workers(tctx->io_wq, new_count); if (ret) goto err; } else { memset(new_count, 0, sizeof(new_count)); } if (sqd) { mutex_unlock(&ctx->uring_lock); mutex_unlock(&sqd->lock); io_put_sq_data(sqd); mutex_lock(&ctx->uring_lock); } if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; /* that's it for SQPOLL, only the SQPOLL task creates requests */ if (sqd) return 0; /* now propagate the restriction to all registered users */ list_for_each_entry(node, &ctx->tctx_list, ctx_node) { tctx = node->task->io_uring; if (WARN_ON_ONCE(!tctx->io_wq)) continue; for (i = 0; i < ARRAY_SIZE(new_count); i++) new_count[i] = ctx->iowq_limits[i]; /* ignore errors, it always returns zero anyway */ (void)io_wq_max_workers(tctx->io_wq, new_count); } return 0; err: if (sqd) { mutex_unlock(&ctx->uring_lock); mutex_unlock(&sqd->lock); io_put_sq_data(sqd); mutex_lock(&ctx->uring_lock); } return ret; } static int io_register_clock(struct io_ring_ctx *ctx, struct io_uring_clock_register __user *arg) { struct io_uring_clock_register reg; if (copy_from_user(&reg, arg, sizeof(reg))) return -EFAULT; if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv))) return -EINVAL; switch (reg.clockid) { case CLOCK_MONOTONIC: ctx->clock_offset = 0; break; case CLOCK_BOOTTIME: ctx->clock_offset = TK_OFFS_BOOT; break; default: return -EINVAL; } ctx->clockid = reg.clockid; return 0; } /* * State to maintain until we can swap. Both new and old state, used for * either mapping or freeing. */ struct io_ring_ctx_rings { struct io_rings *rings; struct io_uring_sqe *sq_sqes; struct io_mapped_region sq_region; struct io_mapped_region ring_region; }; static void io_register_free_rings(struct io_ring_ctx *ctx, struct io_uring_params *p, struct io_ring_ctx_rings *r) { io_free_region(ctx, &r->sq_region); io_free_region(ctx, &r->ring_region); } #define swap_old(ctx, o, n, field) \ do { \ (o).field = (ctx)->field; \ (ctx)->field = (n).field; \ } while (0) #define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP) #define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \ IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP) static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_region_desc rd; struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; size_t size, sq_array_offset; unsigned i, tail, old_head; struct io_uring_params p; int ret; /* for single issuer, must be owner resizing */ if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && current != ctx->submitter_task) return -EEXIST; /* limited to DEFER_TASKRUN for now */ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) return -EINVAL; if (copy_from_user(&p, arg, sizeof(p))) return -EFAULT; if (p.flags & ~RESIZE_FLAGS) return -EINVAL; /* properties that are always inherited */ p.flags |= (ctx->flags & COPY_FLAGS); ret = io_uring_fill_params(p.sq_entries, &p); if (unlikely(ret)) return ret; /* nothing to do, but copy params back */ if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { if (copy_to_user(arg, &p, sizeof(p))) return -EFAULT; return 0; } size = rings_size(p.flags, p.sq_entries, p.cq_entries, &sq_array_offset); if (size == SIZE_MAX) return -EOVERFLOW; memset(&rd, 0, sizeof(rd)); rd.size = PAGE_ALIGN(size); if (p.flags & IORING_SETUP_NO_MMAP) { rd.user_addr = p.cq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); if (ret) { io_register_free_rings(ctx, &p, &n); return ret; } n.rings = io_region_get_ptr(&n.ring_region); /* * At this point n.rings is shared with userspace, just like o.rings * is as well. While we don't expect userspace to modify it while * a resize is in progress, and it's most likely that userspace will * shoot itself in the foot if it does, we can't always assume good * intent... Use read/write once helpers from here on to indicate the * shared nature of it. */ WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1); WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1); WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries); WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries); if (copy_to_user(arg, &p, sizeof(p))) { io_register_free_rings(ctx, &p, &n); return -EFAULT; } if (p.flags & IORING_SETUP_SQE128) size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries); else size = array_size(sizeof(struct io_uring_sqe), p.sq_entries); if (size == SIZE_MAX) { io_register_free_rings(ctx, &p, &n); return -EOVERFLOW; } memset(&rd, 0, sizeof(rd)); rd.size = PAGE_ALIGN(size); if (p.flags & IORING_SETUP_NO_MMAP) { rd.user_addr = p.sq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } ret = io_create_region_mmap_safe(ctx, &n.sq_region, &rd, IORING_OFF_SQES); if (ret) { io_register_free_rings(ctx, &p, &n); return ret; } n.sq_sqes = io_region_get_ptr(&n.sq_region); /* * If using SQPOLL, park the thread */ if (ctx->sq_data) { mutex_unlock(&ctx->uring_lock); io_sq_thread_park(ctx->sq_data); mutex_lock(&ctx->uring_lock); } /* * We'll do the swap. Grab the ctx->mmap_lock, which will exclude * any new mmap's on the ring fd. Clear out existing mappings to prevent * mmap from seeing them, as we'll unmap them. Any attempt to mmap * existing rings beyond this point will fail. Not that it could proceed * at this point anyway, as the io_uring mmap side needs go grab the * ctx->mmap_lock as well. Likewise, hold the completion lock over the * duration of the actual swap. */ mutex_lock(&ctx->mmap_lock); spin_lock(&ctx->completion_lock); o.rings = ctx->rings; ctx->rings = NULL; o.sq_sqes = ctx->sq_sqes; ctx->sq_sqes = NULL; /* * Now copy SQ and CQ entries, if any. If either of the destination * rings can't hold what is already there, then fail the operation. */ tail = READ_ONCE(o.rings->sq.tail); old_head = READ_ONCE(o.rings->sq.head); if (tail - old_head > p.sq_entries) goto overflow; for (i = old_head; i < tail; i++) { unsigned src_head = i & (ctx->sq_entries - 1); unsigned dst_head = i & (p.sq_entries - 1); n.sq_sqes[dst_head] = o.sq_sqes[src_head]; } WRITE_ONCE(n.rings->sq.head, old_head); WRITE_ONCE(n.rings->sq.tail, tail); tail = READ_ONCE(o.rings->cq.tail); old_head = READ_ONCE(o.rings->cq.head); if (tail - old_head > p.cq_entries) { overflow: /* restore old rings, and return -EOVERFLOW via cleanup path */ ctx->rings = o.rings; ctx->sq_sqes = o.sq_sqes; to_free = &n; ret = -EOVERFLOW; goto out; } for (i = old_head; i < tail; i++) { unsigned src_head = i & (ctx->cq_entries - 1); unsigned dst_head = i & (p.cq_entries - 1); n.rings->cqes[dst_head] = o.rings->cqes[src_head]; } WRITE_ONCE(n.rings->cq.head, old_head); WRITE_ONCE(n.rings->cq.tail, tail); /* invalidate cached cqe refill */ ctx->cqe_cached = ctx->cqe_sentinel = NULL; WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags)); WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); /* all done, store old pointers and assign new ones */ if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset); ctx->sq_entries = p.sq_entries; ctx->cq_entries = p.cq_entries; ctx->rings = n.rings; ctx->sq_sqes = n.sq_sqes; swap_old(ctx, o, n, ring_region); swap_old(ctx, o, n, sq_region); to_free = &o; ret = 0; out: spin_unlock(&ctx->completion_lock); mutex_unlock(&ctx->mmap_lock); io_register_free_rings(ctx, &p, to_free); if (ctx->sq_data) io_sq_thread_unpark(ctx->sq_data); return ret; } static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) { struct io_uring_mem_region_reg __user *reg_uptr = uarg; struct io_uring_mem_region_reg reg; struct io_uring_region_desc __user *rd_uptr; struct io_uring_region_desc rd; int ret; if (io_region_is_set(&ctx->param_region)) return -EBUSY; if (copy_from_user(&reg, reg_uptr, sizeof(reg))) return -EFAULT; rd_uptr = u64_to_user_ptr(reg.region_uptr); if (copy_from_user(&rd, rd_uptr, sizeof(rd))) return -EFAULT; if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv))) return -EINVAL; if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG) return -EINVAL; /* * This ensures there are no waiters. Waiters are unlocked and it's * hard to synchronise with them, especially if we need to initialise * the region. */ if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) && !(ctx->flags & IORING_SETUP_R_DISABLED)) return -EINVAL; ret = io_create_region_mmap_safe(ctx, &ctx->param_region, &rd, IORING_MAP_OFF_PARAM_REGION); if (ret) return ret; if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { io_free_region(ctx, &ctx->param_region); return -EFAULT; } if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) { ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region); ctx->cq_wait_size = rd.size; } return 0; } static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) __releases(ctx->uring_lock) __acquires(ctx->uring_lock) { int ret; /* * We don't quiesce the refs for register anymore and so it can't be * dying as we're holding a file ref here. */ if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) return -ENXIO; if (ctx->submitter_task && ctx->submitter_task != current) return -EEXIST; if (ctx->restricted) { opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; } switch (opcode) { case IORING_REGISTER_BUFFERS: ret = -EFAULT; if (!arg) break; ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); break; case IORING_UNREGISTER_BUFFERS: ret = -EINVAL; if (arg || nr_args) break; ret = io_sqe_buffers_unregister(ctx); break; case IORING_REGISTER_FILES: ret = -EFAULT; if (!arg) break; ret = io_sqe_files_register(ctx, arg, nr_args, NULL); break; case IORING_UNREGISTER_FILES: ret = -EINVAL; if (arg || nr_args) break; ret = io_sqe_files_unregister(ctx); break; case IORING_REGISTER_FILES_UPDATE: ret = io_register_files_update(ctx, arg, nr_args); break; case IORING_REGISTER_EVENTFD: ret = -EINVAL; if (nr_args != 1) break; ret = io_eventfd_register(ctx, arg, 0); break; case IORING_REGISTER_EVENTFD_ASYNC: ret = -EINVAL; if (nr_args != 1) break; ret = io_eventfd_register(ctx, arg, 1); break; case IORING_UNREGISTER_EVENTFD: ret = -EINVAL; if (arg || nr_args) break; ret = io_eventfd_unregister(ctx); break; case IORING_REGISTER_PROBE: ret = -EINVAL; if (!arg || nr_args > 256) break; ret = io_probe(ctx, arg, nr_args); break; case IORING_REGISTER_PERSONALITY: ret = -EINVAL; if (arg || nr_args) break; ret = io_register_personality(ctx); break; case IORING_UNREGISTER_PERSONALITY: ret = -EINVAL; if (arg) break; ret = io_unregister_personality(ctx, nr_args); break; case IORING_REGISTER_ENABLE_RINGS: ret = -EINVAL; if (arg || nr_args) break; ret = io_register_enable_rings(ctx); break; case IORING_REGISTER_RESTRICTIONS: ret = io_register_restrictions(ctx, arg, nr_args); break; case IORING_REGISTER_FILES2: ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); break; case IORING_REGISTER_FILES_UPDATE2: ret = io_register_rsrc_update(ctx, arg, nr_args, IORING_RSRC_FILE); break; case IORING_REGISTER_BUFFERS2: ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); break; case IORING_REGISTER_BUFFERS_UPDATE: ret = io_register_rsrc_update(ctx, arg, nr_args, IORING_RSRC_BUFFER); break; case IORING_REGISTER_IOWQ_AFF: ret = -EINVAL; if (!arg || !nr_args) break; ret = io_register_iowq_aff(ctx, arg, nr_args); break; case IORING_UNREGISTER_IOWQ_AFF: ret = -EINVAL; if (arg || nr_args) break; ret = io_unregister_iowq_aff(ctx); break; case IORING_REGISTER_IOWQ_MAX_WORKERS: ret = -EINVAL; if (!arg || nr_args != 2) break; ret = io_register_iowq_max_workers(ctx, arg); break; case IORING_REGISTER_RING_FDS: ret = io_ringfd_register(ctx, arg, nr_args); break; case IORING_UNREGISTER_RING_FDS: ret = io_ringfd_unregister(ctx, arg, nr_args); break; case IORING_REGISTER_PBUF_RING: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_pbuf_ring(ctx, arg); break; case IORING_UNREGISTER_PBUF_RING: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_unregister_pbuf_ring(ctx, arg); break; case IORING_REGISTER_SYNC_CANCEL: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_sync_cancel(ctx, arg); break; case IORING_REGISTER_FILE_ALLOC_RANGE: ret = -EINVAL; if (!arg || nr_args) break; ret = io_register_file_alloc_range(ctx, arg); break; case IORING_REGISTER_PBUF_STATUS: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_pbuf_status(ctx, arg); break; case IORING_REGISTER_NAPI: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_napi(ctx, arg); break; case IORING_UNREGISTER_NAPI: ret = -EINVAL; if (nr_args != 1) break; ret = io_unregister_napi(ctx, arg); break; case IORING_REGISTER_CLOCK: ret = -EINVAL; if (!arg || nr_args) break; ret = io_register_clock(ctx, arg); break; case IORING_REGISTER_CLONE_BUFFERS: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_clone_buffers(ctx, arg); break; case IORING_REGISTER_RESIZE_RINGS: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_resize_rings(ctx, arg); break; case IORING_REGISTER_MEM_REGION: ret = -EINVAL; if (!arg || nr_args != 1) break; ret = io_register_mem_region(ctx, arg); break; default: ret = -EINVAL; break; } return ret; } /* * Given an 'fd' value, return the ctx associated with if. If 'registered' is * true, then the registered index is used. Otherwise, the normal fd table. * Caller must call fput() on the returned file, unless it's an ERR_PTR. */ struct file *io_uring_register_get_file(unsigned int fd, bool registered) { struct file *file; if (registered) { /* * Ring fd has been registered via IORING_REGISTER_RING_FDS, we * need only dereference our task private array to find it. */ struct io_uring_task *tctx = current->io_uring; if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) return ERR_PTR(-EINVAL); fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); file = tctx->registered_rings[fd]; if (file) get_file(file); } else { file = fget(fd); } if (unlikely(!file)) return ERR_PTR(-EBADF); if (io_is_uring_fops(file)) return file; fput(file); return ERR_PTR(-EOPNOTSUPP); } /* * "blind" registration opcodes are ones where there's no ring given, and * hence the source fd must be -1. */ static int io_uring_register_blind(unsigned int opcode, void __user *arg, unsigned int nr_args) { switch (opcode) { case IORING_REGISTER_SEND_MSG_RING: { struct io_uring_sqe sqe; if (!arg || nr_args != 1) return -EINVAL; if (copy_from_user(&sqe, arg, sizeof(sqe))) return -EFAULT; /* no flags supported */ if (sqe.flags) return -EINVAL; if (sqe.opcode == IORING_OP_MSG_RING) return io_uring_sync_msg_ring(&sqe); } } return -EINVAL; } SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, void __user *, arg, unsigned int, nr_args) { struct io_ring_ctx *ctx; long ret = -EBADF; struct file *file; bool use_registered_ring; use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; if (opcode >= IORING_REGISTER_LAST) return -EINVAL; if (fd == -1) return io_uring_register_blind(opcode, arg, nr_args); file = io_uring_register_get_file(fd, use_registered_ring); if (IS_ERR(file)) return PTR_ERR(file); ctx = file->private_data; mutex_lock(&ctx->uring_lock); ret = __io_uring_register(ctx, opcode, arg, nr_args); trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, ctx->buf_table.nr, ret); mutex_unlock(&ctx->uring_lock); fput(file); return ret; }
8 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer device management * Copyright (c) 1999 by Takashi Iwai <tiwai@suse.de> * *---------------------------------------------------------------- * * This device handler separates the card driver module from sequencer * stuff (sequencer core, synth drivers, etc), so that user can avoid * to spend unnecessary resources e.g. if he needs only listening to * MP3s. * * The card (or lowlevel) driver creates a sequencer device entry * via snd_seq_device_new(). This is an entry pointer to communicate * with the sequencer device "driver", which is involved with the * actual part to communicate with the sequencer core. * Each sequencer device entry has an id string and the corresponding * driver with the same id is loaded when required. For example, * lowlevel codes to access emu8000 chip on sbawe card are included in * emu8000-synth module. To activate this module, the hardware * resources like i/o port are passed via snd_seq_device argument. */ #include <linux/device.h> #include <linux/init.h> #include <linux/module.h> #include <sound/core.h> #include <sound/info.h> #include <sound/seq_device.h> #include <sound/seq_kernel.h> #include <sound/initval.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/mutex.h> MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("ALSA sequencer device management"); MODULE_LICENSE("GPL"); /* * bus definition */ static int snd_seq_bus_match(struct device *dev, const struct device_driver *drv) { struct snd_seq_device *sdev = to_seq_dev(dev); struct snd_seq_driver *sdrv = to_seq_drv(drv); return strcmp(sdrv->id, sdev->id) == 0 && sdrv->argsize == sdev->argsize; } static const struct bus_type snd_seq_bus_type = { .name = "snd_seq", .match = snd_seq_bus_match, }; /* * proc interface -- just for compatibility */ #ifdef CONFIG_SND_PROC_FS static struct snd_info_entry *info_entry; static int print_dev_info(struct device *dev, void *data) { struct snd_seq_device *sdev = to_seq_dev(dev); struct snd_info_buffer *buffer = data; snd_iprintf(buffer, "snd-%s,%s,%d\n", sdev->id, dev->driver ? "loaded" : "empty", dev->driver ? 1 : 0); return 0; } static void snd_seq_device_info(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { bus_for_each_dev(&snd_seq_bus_type, NULL, buffer, print_dev_info); } #endif /* * load all registered drivers (called from seq_clientmgr.c) */ #ifdef CONFIG_MODULES /* flag to block auto-loading */ static atomic_t snd_seq_in_init = ATOMIC_INIT(1); /* blocked as default */ static int request_seq_drv(struct device *dev, void *data) { struct snd_seq_device *sdev = to_seq_dev(dev); if (!dev->driver) request_module("snd-%s", sdev->id); return 0; } static void autoload_drivers(struct work_struct *work) { /* avoid reentrance */ if (atomic_inc_return(&snd_seq_in_init) == 1) bus_for_each_dev(&snd_seq_bus_type, NULL, NULL, request_seq_drv); atomic_dec(&snd_seq_in_init); } static DECLARE_WORK(autoload_work, autoload_drivers); static void queue_autoload_drivers(void) { schedule_work(&autoload_work); } void snd_seq_autoload_init(void) { atomic_dec(&snd_seq_in_init); #ifdef CONFIG_SND_SEQUENCER_MODULE /* initial autoload only when snd-seq is a module */ queue_autoload_drivers(); #endif } EXPORT_SYMBOL(snd_seq_autoload_init); void snd_seq_autoload_exit(void) { atomic_inc(&snd_seq_in_init); } EXPORT_SYMBOL(snd_seq_autoload_exit); void snd_seq_device_load_drivers(void) { queue_autoload_drivers(); flush_work(&autoload_work); } EXPORT_SYMBOL(snd_seq_device_load_drivers); static inline void cancel_autoload_drivers(void) { cancel_work_sync(&autoload_work); } #else static inline void queue_autoload_drivers(void) { } static inline void cancel_autoload_drivers(void) { } #endif /* * device management */ static int snd_seq_device_dev_free(struct snd_device *device) { struct snd_seq_device *dev = device->device_data; cancel_autoload_drivers(); if (dev->private_free) dev->private_free(dev); put_device(&dev->dev); return 0; } static int snd_seq_device_dev_register(struct snd_device *device) { struct snd_seq_device *dev = device->device_data; int err; err = device_add(&dev->dev); if (err < 0) return err; if (!dev->dev.driver) queue_autoload_drivers(); return 0; } static int snd_seq_device_dev_disconnect(struct snd_device *device) { struct snd_seq_device *dev = device->device_data; device_del(&dev->dev); return 0; } static void snd_seq_dev_release(struct device *dev) { kfree(to_seq_dev(dev)); } /* * register a sequencer device * card = card info * device = device number (if any) * id = id of driver * result = return pointer (NULL allowed if unnecessary) */ int snd_seq_device_new(struct snd_card *card, int device, const char *id, int argsize, struct snd_seq_device **result) { struct snd_seq_device *dev; int err; static const struct snd_device_ops dops = { .dev_free = snd_seq_device_dev_free, .dev_register = snd_seq_device_dev_register, .dev_disconnect = snd_seq_device_dev_disconnect, }; if (result) *result = NULL; if (snd_BUG_ON(!id)) return -EINVAL; dev = kzalloc(sizeof(*dev) + argsize, GFP_KERNEL); if (!dev) return -ENOMEM; /* set up device info */ dev->card = card; dev->device = device; dev->id = id; dev->argsize = argsize; device_initialize(&dev->dev); dev->dev.parent = &card->card_dev; dev->dev.bus = &snd_seq_bus_type; dev->dev.release = snd_seq_dev_release; dev_set_name(&dev->dev, "%s-%d-%d", dev->id, card->number, device); /* add this device to the list */ err = snd_device_new(card, SNDRV_DEV_SEQUENCER, dev, &dops); if (err < 0) { put_device(&dev->dev); return err; } if (result) *result = dev; return 0; } EXPORT_SYMBOL(snd_seq_device_new); /* * driver registration */ int __snd_seq_driver_register(struct snd_seq_driver *drv, struct module *mod) { if (WARN_ON(!drv->driver.name || !drv->id)) return -EINVAL; drv->driver.bus = &snd_seq_bus_type; drv->driver.owner = mod; return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(__snd_seq_driver_register); void snd_seq_driver_unregister(struct snd_seq_driver *drv) { driver_unregister(&drv->driver); } EXPORT_SYMBOL_GPL(snd_seq_driver_unregister); /* * module part */ static int __init seq_dev_proc_init(void) { #ifdef CONFIG_SND_PROC_FS info_entry = snd_info_create_module_entry(THIS_MODULE, "drivers", snd_seq_root); if (info_entry == NULL) return -ENOMEM; info_entry->content = SNDRV_INFO_CONTENT_TEXT; info_entry->c.text.read = snd_seq_device_info; if (snd_info_register(info_entry) < 0) { snd_info_free_entry(info_entry); return -ENOMEM; } #endif return 0; } static int __init alsa_seq_device_init(void) { int err; err = bus_register(&snd_seq_bus_type); if (err < 0) return err; err = seq_dev_proc_init(); if (err < 0) bus_unregister(&snd_seq_bus_type); return err; } static void __exit alsa_seq_device_exit(void) { #ifdef CONFIG_MODULES cancel_work_sync(&autoload_work); #endif #ifdef CONFIG_SND_PROC_FS snd_info_free_entry(info_entry); #endif bus_unregister(&snd_seq_bus_type); } subsys_initcall(alsa_seq_device_init) module_exit(alsa_seq_device_exit)
901 902 901 1 903 900 901 676 676 675 889 894 892 651 651 650 651 651 649 376 375 356 354 375 363 361 16 363 363 362 16 16 362 674 677 673 33 33 33 33 33 33 33 360 359 360 235 237 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2018 Red Hat, Inc. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_extent_busy.h" #include "xfs_group.h" /* * Groups can have passive and active references. * * For passive references the code freeing a group is responsible for cleaning * up objects that hold the passive references (e.g. cached buffers). * Routines manipulating passive references are xfs_group_get, xfs_group_hold * and xfs_group_put. * * Active references are for short term access to the group for walking trees or * accessing state. If a group is being shrunk or offlined, the lookup will fail * to find that group and return NULL instead. * Routines manipulating active references are xfs_group_grab and * xfs_group_rele. */ struct xfs_group * xfs_group_get( struct xfs_mount *mp, uint32_t index, enum xfs_group_type type) { struct xfs_group *xg; rcu_read_lock(); xg = xa_load(&mp->m_groups[type].xa, index); if (xg) { trace_xfs_group_get(xg, _RET_IP_); ASSERT(atomic_read(&xg->xg_ref) >= 0); atomic_inc(&xg->xg_ref); } rcu_read_unlock(); return xg; } struct xfs_group * xfs_group_hold( struct xfs_group *xg) { ASSERT(atomic_read(&xg->xg_ref) > 0 || atomic_read(&xg->xg_active_ref) > 0); trace_xfs_group_hold(xg, _RET_IP_); atomic_inc(&xg->xg_ref); return xg; } void xfs_group_put( struct xfs_group *xg) { trace_xfs_group_put(xg, _RET_IP_); ASSERT(atomic_read(&xg->xg_ref) > 0); atomic_dec(&xg->xg_ref); } struct xfs_group * xfs_group_grab( struct xfs_mount *mp, uint32_t index, enum xfs_group_type type) { struct xfs_group *xg; rcu_read_lock(); xg = xa_load(&mp->m_groups[type].xa, index); if (xg) { trace_xfs_group_grab(xg, _RET_IP_); if (!atomic_inc_not_zero(&xg->xg_active_ref)) xg = NULL; } rcu_read_unlock(); return xg; } /* * Iterate to the next group. To start the iteration at @start_index, a %NULL * @xg is passed, else the previous group returned from this function. The * caller should break out of the loop when this returns %NULL. If the caller * wants to break out of a loop that did not finish it needs to release the * active reference to @xg using xfs_group_rele() itself. */ struct xfs_group * xfs_group_next_range( struct xfs_mount *mp, struct xfs_group *xg, uint32_t start_index, uint32_t end_index, enum xfs_group_type type) { uint32_t index = start_index; if (xg) { index = xg->xg_gno + 1; xfs_group_rele(xg); } if (index > end_index) return NULL; return xfs_group_grab(mp, index, type); } /* * Find the next group after @xg, or the first group if @xg is NULL. */ struct xfs_group * xfs_group_grab_next_mark( struct xfs_mount *mp, struct xfs_group *xg, xa_mark_t mark, enum xfs_group_type type) { unsigned long index = 0; if (xg) { index = xg->xg_gno + 1; xfs_group_rele(xg); } rcu_read_lock(); xg = xa_find(&mp->m_groups[type].xa, &index, ULONG_MAX, mark); if (xg) { trace_xfs_group_grab_next_tag(xg, _RET_IP_); if (!atomic_inc_not_zero(&xg->xg_active_ref)) xg = NULL; } rcu_read_unlock(); return xg; } void xfs_group_rele( struct xfs_group *xg) { trace_xfs_group_rele(xg, _RET_IP_); atomic_dec(&xg->xg_active_ref); } void xfs_group_free( struct xfs_mount *mp, uint32_t index, enum xfs_group_type type, void (*uninit)(struct xfs_group *xg)) { struct xfs_group *xg = xa_erase(&mp->m_groups[type].xa, index); XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_ref) != 0); xfs_defer_drain_free(&xg->xg_intents_drain); #ifdef __KERNEL__ kfree(xg->xg_busy_extents); #endif if (uninit) uninit(xg); /* drop the mount's active reference */ xfs_group_rele(xg); XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) != 0); kfree_rcu_mightsleep(xg); } int xfs_group_insert( struct xfs_mount *mp, struct xfs_group *xg, uint32_t index, enum xfs_group_type type) { int error; xg->xg_mount = mp; xg->xg_gno = index; xg->xg_type = type; #ifdef __KERNEL__ xg->xg_busy_extents = xfs_extent_busy_alloc(); if (!xg->xg_busy_extents) return -ENOMEM; spin_lock_init(&xg->xg_state_lock); xfs_hooks_init(&xg->xg_rmap_update_hooks); #endif xfs_defer_drain_init(&xg->xg_intents_drain); /* Active ref owned by mount indicates group is online. */ atomic_set(&xg->xg_active_ref, 1); error = xa_insert(&mp->m_groups[type].xa, index, xg, GFP_KERNEL); if (error) { WARN_ON_ONCE(error == -EBUSY); goto out_drain; } return 0; out_drain: xfs_defer_drain_free(&xg->xg_intents_drain); #ifdef __KERNEL__ kfree(xg->xg_busy_extents); #endif return error; } struct xfs_group * xfs_group_get_by_fsb( struct xfs_mount *mp, xfs_fsblock_t fsbno, enum xfs_group_type type) { return xfs_group_get(mp, xfs_fsb_to_gno(mp, fsbno, type), type); }
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 /* * CTS: Cipher Text Stealing mode * * COPYRIGHT (c) 2008 * The Regents of the University of Michigan * ALL RIGHTS RESERVED * * Permission is granted to use, copy, create derivative works * and redistribute this software and such derivative works * for any purpose, so long as the name of The University of * Michigan is not used in any advertising or publicity * pertaining to the use of distribution of this software * without specific, written prior authorization. If the * above copyright notice or any other identification of the * University of Michigan is included in any copy of any * portion of this software, then the disclaimer below must * also be included. * * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF * SUCH DAMAGES. */ /* Derived from various: * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ /* * This is the Cipher Text Stealing mode as described by * Section 8 of rfc2040 and referenced by rfc3962. * rfc3962 includes errata information in its Appendix A. */ #include <crypto/algapi.h> #include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/log2.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <crypto/scatterwalk.h> #include <linux/slab.h> #include <linux/compiler.h> struct crypto_cts_ctx { struct crypto_skcipher *child; }; struct crypto_cts_reqctx { struct scatterlist sg[2]; unsigned offset; struct skcipher_request subreq; }; static inline u8 *crypto_cts_reqctx_space(struct skcipher_request *req) { struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *child = ctx->child; return PTR_ALIGN((u8 *)(rctx + 1) + crypto_skcipher_reqsize(child), crypto_skcipher_alignmask(tfm) + 1); } static int crypto_cts_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child = ctx->child; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } static void cts_cbc_crypt_done(void *data, int err) { struct skcipher_request *req = data; if (err == -EINPROGRESS) return; skcipher_request_complete(req, err); } static int cts_cbc_encrypt(struct skcipher_request *req) { struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; int lastn; offset = rctx->offset; lastn = req->cryptlen - offset; sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize); scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0); memset(d, 0, bsize); scatterwalk_map_and_copy(d, req->src, offset, lastn, 0); scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1); memzero_explicit(d, sizeof(d)); skcipher_request_set_callback(subreq, req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG, cts_cbc_crypt_done, req); skcipher_request_set_crypt(subreq, sg, sg, bsize, req->iv); return crypto_skcipher_encrypt(subreq); } static void crypto_cts_encrypt_done(void *data, int err) { struct skcipher_request *req = data; if (err) goto out; err = cts_cbc_encrypt(req); if (err == -EINPROGRESS || err == -EBUSY) return; out: skcipher_request_complete(req, err); } static int crypto_cts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); unsigned int nbytes = req->cryptlen; unsigned int offset; skcipher_request_set_tfm(subreq, ctx->child); if (nbytes < bsize) return -EINVAL; if (nbytes == bsize) { skcipher_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes, req->iv); return crypto_skcipher_encrypt(subreq); } offset = rounddown(nbytes - 1, bsize); rctx->offset = offset; skcipher_request_set_callback(subreq, req->base.flags, crypto_cts_encrypt_done, req); skcipher_request_set_crypt(subreq, req->src, req->dst, offset, req->iv); return crypto_skcipher_encrypt(subreq) ?: cts_cbc_encrypt(req); } static int cts_cbc_decrypt(struct skcipher_request *req) { struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; u8 *space; int lastn; offset = rctx->offset; lastn = req->cryptlen - offset; sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize); /* 1. Decrypt Cn-1 (s) to create Dn */ scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0); space = crypto_cts_reqctx_space(req); crypto_xor(d + bsize, space, bsize); /* 2. Pad Cn with zeros at the end to create C of length BB */ memset(d, 0, bsize); scatterwalk_map_and_copy(d, req->src, offset, lastn, 0); /* 3. Exclusive-or Dn with C to create Xn */ /* 4. Select the first Ln bytes of Xn to create Pn */ crypto_xor(d + bsize, d, lastn); /* 5. Append the tail (BB - Ln) bytes of Xn to Cn to create En */ memcpy(d + lastn, d + bsize + lastn, bsize - lastn); /* 6. Decrypt En to create Pn-1 */ scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1); memzero_explicit(d, sizeof(d)); skcipher_request_set_callback(subreq, req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG, cts_cbc_crypt_done, req); skcipher_request_set_crypt(subreq, sg, sg, bsize, space); return crypto_skcipher_decrypt(subreq); } static void crypto_cts_decrypt_done(void *data, int err) { struct skcipher_request *req = data; if (err) goto out; err = cts_cbc_decrypt(req); if (err == -EINPROGRESS || err == -EBUSY) return; out: skcipher_request_complete(req, err); } static int crypto_cts_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); unsigned int nbytes = req->cryptlen; unsigned int offset; u8 *space; skcipher_request_set_tfm(subreq, ctx->child); if (nbytes < bsize) return -EINVAL; if (nbytes == bsize) { skcipher_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes, req->iv); return crypto_skcipher_decrypt(subreq); } skcipher_request_set_callback(subreq, req->base.flags, crypto_cts_decrypt_done, req); space = crypto_cts_reqctx_space(req); offset = rounddown(nbytes - 1, bsize); rctx->offset = offset; if (offset <= bsize) memcpy(space, req->iv, bsize); else scatterwalk_map_and_copy(space, req->src, offset - 2 * bsize, bsize, 0); skcipher_request_set_crypt(subreq, req->src, req->dst, offset, req->iv); return crypto_skcipher_decrypt(subreq) ?: cts_cbc_decrypt(req); } static int crypto_cts_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *cipher; unsigned reqsize; unsigned bsize; unsigned align; cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; align = crypto_skcipher_alignmask(tfm); bsize = crypto_skcipher_blocksize(cipher); reqsize = ALIGN(sizeof(struct crypto_cts_reqctx) + crypto_skcipher_reqsize(cipher), crypto_tfm_ctx_alignment()) + (align & ~(crypto_tfm_ctx_alignment() - 1)) + bsize; crypto_skcipher_set_reqsize(tfm, reqsize); return 0; } static void crypto_cts_exit_tfm(struct crypto_skcipher *tfm) { struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); } static void crypto_cts_free(struct skcipher_instance *inst) { crypto_drop_skcipher(skcipher_instance_ctx(inst)); kfree(inst); } static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_skcipher_spawn *spawn; struct skcipher_alg_common *alg; struct skcipher_instance *inst; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(spawn); err = -EINVAL; if (alg->ivsize != alg->base.cra_blocksize) goto err_free_inst; if (strncmp(alg->base.cra_name, "cbc(", 4)) goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "cts", &alg->base); if (err) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = alg->base.cra_blocksize; inst->alg.base.cra_alignmask = alg->base.cra_alignmask; inst->alg.ivsize = alg->base.cra_blocksize; inst->alg.chunksize = alg->chunksize; inst->alg.min_keysize = alg->min_keysize; inst->alg.max_keysize = alg->max_keysize; inst->alg.base.cra_ctxsize = sizeof(struct crypto_cts_ctx); inst->alg.init = crypto_cts_init_tfm; inst->alg.exit = crypto_cts_exit_tfm; inst->alg.setkey = crypto_cts_setkey; inst->alg.encrypt = crypto_cts_encrypt; inst->alg.decrypt = crypto_cts_decrypt; inst->free = crypto_cts_free; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_cts_free(inst); } return err; } static struct crypto_template crypto_cts_tmpl = { .name = "cts", .create = crypto_cts_create, .module = THIS_MODULE, }; static int __init crypto_cts_module_init(void) { return crypto_register_template(&crypto_cts_tmpl); } static void __exit crypto_cts_module_exit(void) { crypto_unregister_template(&crypto_cts_tmpl); } subsys_initcall(crypto_cts_module_init); module_exit(crypto_cts_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("CTS-CBC CipherText Stealing for CBC"); MODULE_ALIAS_CRYPTO("cts");
151 2 2 2 2 78 7 1 36 36 32 28 4 4 28 1 2 1 30 6 7 7 7 2 5 5 2 64 64 9 55 57 7 35 5 25 5 40 30 2 2 9 25 6 53 54 54 28 24 1 1 2 2 2 48 52 27 2 25 36 35 4 3 29 27 1 3 25 23 23 2 4 1 3 5 1 7 4 2 1 2 1 1 2 2 1 1 28 4 24 17 10 9 3 5 2 5 1 11 11 3 8 6 8 8 8 8 4 8 10 10 4 2 4 8 2 4 4 2 4 4 2 4 2 10 19 19 19 8 16 7 9 3 1 5 8 8 8 8 7 1 2 4 8 1 37 19 11 8 29 8 3 3 40 38 38 38 37 1 19 8 11 11 27 19 11 8 19 11 8 19 11 8 37 16 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 // SPDX-License-Identifier: GPL-2.0-only /* * A virtual codec example device. * * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * * This is a virtual codec device driver for testing the codec framework. * It simulates a device that uses memory buffers for both source and * destination and encodes or decodes the data. */ #include <linux/module.h> #include <linux/delay.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/platform_device.h> #include <media/v4l2-mem2mem.h> #include <media/v4l2-device.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-ctrls.h> #include <media/v4l2-event.h> #include <media/videobuf2-vmalloc.h> #include "codec-v4l2-fwht.h" MODULE_DESCRIPTION("Virtual codec device"); MODULE_AUTHOR("Hans Verkuil <hansverk@cisco.com>"); MODULE_LICENSE("GPL v2"); static bool multiplanar; module_param(multiplanar, bool, 0444); MODULE_PARM_DESC(multiplanar, " use multi-planar API instead of single-planar API"); static unsigned int debug; module_param(debug, uint, 0644); MODULE_PARM_DESC(debug, " activates debug info"); #define VICODEC_NAME "vicodec" #define MAX_WIDTH 4096U #define MIN_WIDTH 640U #define MAX_HEIGHT 2160U #define MIN_HEIGHT 360U /* Recommended number of buffers for the stateful codecs */ #define VICODEC_REC_BUFS 2 #define dprintk(dev, fmt, arg...) \ v4l2_dbg(1, debug, &dev->v4l2_dev, "%s: " fmt, __func__, ## arg) struct pixfmt_info { u32 id; unsigned int bytesperline_mult; unsigned int sizeimage_mult; unsigned int sizeimage_div; unsigned int luma_step; unsigned int chroma_step; /* Chroma plane subsampling */ unsigned int width_div; unsigned int height_div; }; static const struct v4l2_fwht_pixfmt_info pixfmt_fwht = { V4L2_PIX_FMT_FWHT, 0, 3, 1, 1, 1, 1, 1, 0, 1 }; static const struct v4l2_fwht_pixfmt_info pixfmt_stateless_fwht = { V4L2_PIX_FMT_FWHT_STATELESS, 0, 3, 1, 1, 1, 1, 1, 0, 1 }; static void vicodec_dev_release(struct device *dev) { } static struct platform_device vicodec_pdev = { .name = VICODEC_NAME, .dev.release = vicodec_dev_release, }; /* Per-queue, driver-specific private data */ struct vicodec_q_data { unsigned int coded_width; unsigned int coded_height; unsigned int visible_width; unsigned int visible_height; unsigned int sizeimage; unsigned int vb2_sizeimage; unsigned int sequence; const struct v4l2_fwht_pixfmt_info *info; }; enum { V4L2_M2M_SRC = 0, V4L2_M2M_DST = 1, }; struct vicodec_dev_instance { struct video_device vfd; struct mutex mutex; spinlock_t lock; struct v4l2_m2m_dev *m2m_dev; }; struct vicodec_dev { struct v4l2_device v4l2_dev; struct vicodec_dev_instance stateful_enc; struct vicodec_dev_instance stateful_dec; struct vicodec_dev_instance stateless_dec; #ifdef CONFIG_MEDIA_CONTROLLER struct media_device mdev; #endif }; struct vicodec_ctx { struct v4l2_fh fh; struct vicodec_dev *dev; bool is_enc; bool is_stateless; spinlock_t *lock; struct v4l2_ctrl_handler hdl; /* Source and destination queue data */ struct vicodec_q_data q_data[2]; struct v4l2_fwht_state state; u32 cur_buf_offset; u32 comp_max_size; u32 comp_size; u32 header_size; u32 comp_magic_cnt; bool comp_has_frame; bool comp_has_next_frame; bool first_source_change_sent; bool source_changed; }; static const struct v4l2_event vicodec_eos_event = { .type = V4L2_EVENT_EOS }; static inline struct vicodec_ctx *file2ctx(struct file *file) { return container_of(file->private_data, struct vicodec_ctx, fh); } static struct vicodec_q_data *get_q_data(struct vicodec_ctx *ctx, enum v4l2_buf_type type) { switch (type) { case V4L2_BUF_TYPE_VIDEO_OUTPUT: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: return &ctx->q_data[V4L2_M2M_SRC]; case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: return &ctx->q_data[V4L2_M2M_DST]; default: break; } return NULL; } static void copy_cap_to_ref(const u8 *cap, const struct v4l2_fwht_pixfmt_info *info, struct v4l2_fwht_state *state) { int plane_idx; u8 *p_ref = state->ref_frame.buf; unsigned int cap_stride = state->stride; unsigned int ref_stride = state->ref_stride; for (plane_idx = 0; plane_idx < info->planes_num; plane_idx++) { int i; unsigned int h_div = (plane_idx == 1 || plane_idx == 2) ? info->height_div : 1; const u8 *row_cap = cap; u8 *row_ref = p_ref; if (info->planes_num == 3 && plane_idx == 1) { cap_stride /= 2; ref_stride /= 2; } if (plane_idx == 1 && (info->id == V4L2_PIX_FMT_NV24 || info->id == V4L2_PIX_FMT_NV42)) { cap_stride *= 2; ref_stride *= 2; } for (i = 0; i < state->visible_height / h_div; i++) { memcpy(row_ref, row_cap, ref_stride); row_ref += ref_stride; row_cap += cap_stride; } cap += cap_stride * (state->coded_height / h_div); p_ref += ref_stride * (state->coded_height / h_div); } } static bool validate_by_version(unsigned int flags, unsigned int version) { if (!version || version > V4L2_FWHT_VERSION) return false; if (version >= 2) { unsigned int components_num = 1 + ((flags & V4L2_FWHT_FL_COMPONENTS_NUM_MSK) >> V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET); unsigned int pixenc = flags & V4L2_FWHT_FL_PIXENC_MSK; if (components_num == 0 || components_num > 4 || !pixenc) return false; } return true; } static bool validate_stateless_params_flags(const struct v4l2_ctrl_fwht_params *params, const struct v4l2_fwht_pixfmt_info *cur_info) { unsigned int width_div = (params->flags & V4L2_FWHT_FL_CHROMA_FULL_WIDTH) ? 1 : 2; unsigned int height_div = (params->flags & V4L2_FWHT_FL_CHROMA_FULL_HEIGHT) ? 1 : 2; unsigned int components_num = 3; unsigned int pixenc = 0; if (params->version < 3) return false; components_num = 1 + ((params->flags & V4L2_FWHT_FL_COMPONENTS_NUM_MSK) >> V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET); pixenc = (params->flags & V4L2_FWHT_FL_PIXENC_MSK); if (v4l2_fwht_validate_fmt(cur_info, width_div, height_div, components_num, pixenc)) return true; return false; } static void update_state_from_header(struct vicodec_ctx *ctx) { const struct fwht_cframe_hdr *p_hdr = &ctx->state.header; ctx->state.visible_width = ntohl(p_hdr->width); ctx->state.visible_height = ntohl(p_hdr->height); ctx->state.colorspace = ntohl(p_hdr->colorspace); ctx->state.xfer_func = ntohl(p_hdr->xfer_func); ctx->state.ycbcr_enc = ntohl(p_hdr->ycbcr_enc); ctx->state.quantization = ntohl(p_hdr->quantization); } static int device_process(struct vicodec_ctx *ctx, struct vb2_v4l2_buffer *src_vb, struct vb2_v4l2_buffer *dst_vb) { struct vicodec_dev *dev = ctx->dev; struct v4l2_fwht_state *state = &ctx->state; u8 *p_src, *p_dst; int ret = 0; if (ctx->is_enc || ctx->is_stateless) p_src = vb2_plane_vaddr(&src_vb->vb2_buf, 0); else p_src = state->compressed_frame; if (ctx->is_stateless) { struct media_request *src_req = src_vb->vb2_buf.req_obj.req; ret = v4l2_ctrl_request_setup(src_req, &ctx->hdl); if (ret) return ret; update_state_from_header(ctx); ctx->state.header.size = htonl(vb2_get_plane_payload(&src_vb->vb2_buf, 0)); /* * set the reference buffer from the reference timestamp * only if this is a P-frame */ if (!(ntohl(ctx->state.header.flags) & V4L2_FWHT_FL_I_FRAME)) { struct vb2_buffer *ref_vb2_buf; struct vb2_queue *vq_cap = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); ref_vb2_buf = vb2_find_buffer(vq_cap, ctx->state.ref_frame_ts); if (!ref_vb2_buf) return -EINVAL; if (ref_vb2_buf->state == VB2_BUF_STATE_ERROR) ret = -EINVAL; ctx->state.ref_frame.buf = vb2_plane_vaddr(ref_vb2_buf, 0); } else { ctx->state.ref_frame.buf = NULL; } } p_dst = vb2_plane_vaddr(&dst_vb->vb2_buf, 0); if (!p_src || !p_dst) { v4l2_err(&dev->v4l2_dev, "Acquiring kernel pointers to buffers failed\n"); return -EFAULT; } if (ctx->is_enc) { struct vicodec_q_data *q_src; int comp_sz_or_errcode; q_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); state->info = q_src->info; comp_sz_or_errcode = v4l2_fwht_encode(state, p_src, p_dst); if (comp_sz_or_errcode < 0) return comp_sz_or_errcode; vb2_set_plane_payload(&dst_vb->vb2_buf, 0, comp_sz_or_errcode); } else { struct vicodec_q_data *q_dst; unsigned int comp_frame_size = ntohl(ctx->state.header.size); q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); if (comp_frame_size > ctx->comp_max_size) return -EINVAL; state->info = q_dst->info; ret = v4l2_fwht_decode(state, p_src, p_dst); if (ret < 0) return ret; if (!ctx->is_stateless) copy_cap_to_ref(p_dst, ctx->state.info, &ctx->state); vb2_set_plane_payload(&dst_vb->vb2_buf, 0, q_dst->sizeimage); if (ntohl(ctx->state.header.flags) & V4L2_FWHT_FL_I_FRAME) dst_vb->flags |= V4L2_BUF_FLAG_KEYFRAME; else dst_vb->flags |= V4L2_BUF_FLAG_PFRAME; } return ret; } /* * mem2mem callbacks */ static enum vb2_buffer_state get_next_header(struct vicodec_ctx *ctx, u8 **pp, u32 sz) { static const u8 magic[] = { 0x4f, 0x4f, 0x4f, 0x4f, 0xff, 0xff, 0xff, 0xff }; u8 *p = *pp; u32 state; u8 *header = (u8 *)&ctx->state.header; state = VB2_BUF_STATE_DONE; if (!ctx->header_size) { state = VB2_BUF_STATE_ERROR; for (; p < *pp + sz; p++) { u32 copy; p = memchr(p, magic[ctx->comp_magic_cnt], *pp + sz - p); if (!p) { ctx->comp_magic_cnt = 0; p = *pp + sz; break; } copy = sizeof(magic) - ctx->comp_magic_cnt; if (*pp + sz - p < copy) copy = *pp + sz - p; memcpy(header + ctx->comp_magic_cnt, p, copy); ctx->comp_magic_cnt += copy; if (!memcmp(header, magic, ctx->comp_magic_cnt)) { p += copy; state = VB2_BUF_STATE_DONE; break; } ctx->comp_magic_cnt = 0; } if (ctx->comp_magic_cnt < sizeof(magic)) { *pp = p; return state; } ctx->header_size = sizeof(magic); } if (ctx->header_size < sizeof(struct fwht_cframe_hdr)) { u32 copy = sizeof(struct fwht_cframe_hdr) - ctx->header_size; if (*pp + sz - p < copy) copy = *pp + sz - p; memcpy(header + ctx->header_size, p, copy); p += copy; ctx->header_size += copy; } *pp = p; return state; } /* device_run() - prepares and starts the device */ static void device_run(void *priv) { struct vicodec_ctx *ctx = priv; struct vicodec_dev *dev = ctx->dev; struct vb2_v4l2_buffer *src_buf, *dst_buf; struct vicodec_q_data *q_src, *q_dst; u32 state; struct media_request *src_req; src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); src_req = src_buf->vb2_buf.req_obj.req; q_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); state = VB2_BUF_STATE_DONE; if (device_process(ctx, src_buf, dst_buf)) state = VB2_BUF_STATE_ERROR; else dst_buf->sequence = q_dst->sequence++; dst_buf->flags &= ~V4L2_BUF_FLAG_LAST; v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false); spin_lock(ctx->lock); if (!ctx->comp_has_next_frame && v4l2_m2m_is_last_draining_src_buf(ctx->fh.m2m_ctx, src_buf)) { dst_buf->flags |= V4L2_BUF_FLAG_LAST; v4l2_event_queue_fh(&ctx->fh, &vicodec_eos_event); v4l2_m2m_mark_stopped(ctx->fh.m2m_ctx); } if (ctx->is_enc || ctx->is_stateless) { src_buf->sequence = q_src->sequence++; src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_buf_done(src_buf, state); } else if (vb2_get_plane_payload(&src_buf->vb2_buf, 0) == ctx->cur_buf_offset) { src_buf->sequence = q_src->sequence++; src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_buf_done(src_buf, state); ctx->cur_buf_offset = 0; ctx->comp_has_next_frame = false; } v4l2_m2m_buf_done(dst_buf, state); ctx->comp_size = 0; ctx->header_size = 0; ctx->comp_magic_cnt = 0; ctx->comp_has_frame = false; spin_unlock(ctx->lock); if (ctx->is_stateless && src_req) v4l2_ctrl_request_complete(src_req, &ctx->hdl); if (ctx->is_enc) v4l2_m2m_job_finish(dev->stateful_enc.m2m_dev, ctx->fh.m2m_ctx); else if (ctx->is_stateless) v4l2_m2m_job_finish(dev->stateless_dec.m2m_dev, ctx->fh.m2m_ctx); else v4l2_m2m_job_finish(dev->stateful_dec.m2m_dev, ctx->fh.m2m_ctx); } static void job_remove_src_buf(struct vicodec_ctx *ctx, u32 state) { struct vb2_v4l2_buffer *src_buf; struct vicodec_q_data *q_src; q_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); spin_lock(ctx->lock); src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); src_buf->sequence = q_src->sequence++; v4l2_m2m_buf_done(src_buf, state); ctx->cur_buf_offset = 0; spin_unlock(ctx->lock); } static const struct v4l2_fwht_pixfmt_info * info_from_header(const struct fwht_cframe_hdr *p_hdr) { unsigned int flags = ntohl(p_hdr->flags); unsigned int width_div = (flags & V4L2_FWHT_FL_CHROMA_FULL_WIDTH) ? 1 : 2; unsigned int height_div = (flags & V4L2_FWHT_FL_CHROMA_FULL_HEIGHT) ? 1 : 2; unsigned int components_num = 3; unsigned int pixenc = 0; unsigned int version = ntohl(p_hdr->version); if (version >= 2) { components_num = 1 + ((flags & V4L2_FWHT_FL_COMPONENTS_NUM_MSK) >> V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET); pixenc = (flags & V4L2_FWHT_FL_PIXENC_MSK); } return v4l2_fwht_find_nth_fmt(width_div, height_div, components_num, pixenc, 0); } static bool is_header_valid(const struct fwht_cframe_hdr *p_hdr) { const struct v4l2_fwht_pixfmt_info *info; unsigned int w = ntohl(p_hdr->width); unsigned int h = ntohl(p_hdr->height); unsigned int version = ntohl(p_hdr->version); unsigned int flags = ntohl(p_hdr->flags); if (w < MIN_WIDTH || w > MAX_WIDTH || h < MIN_HEIGHT || h > MAX_HEIGHT) return false; if (!validate_by_version(flags, version)) return false; info = info_from_header(p_hdr); if (!info) return false; return true; } static void update_capture_data_from_header(struct vicodec_ctx *ctx) { struct vicodec_q_data *q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); const struct fwht_cframe_hdr *p_hdr = &ctx->state.header; const struct v4l2_fwht_pixfmt_info *info = info_from_header(p_hdr); unsigned int flags = ntohl(p_hdr->flags); unsigned int hdr_width_div = (flags & V4L2_FWHT_FL_CHROMA_FULL_WIDTH) ? 1 : 2; unsigned int hdr_height_div = (flags & V4L2_FWHT_FL_CHROMA_FULL_HEIGHT) ? 1 : 2; /* * This function should not be used by a stateless codec since * it changes values in q_data that are not request specific */ WARN_ON(ctx->is_stateless); q_dst->info = info; q_dst->visible_width = ntohl(p_hdr->width); q_dst->visible_height = ntohl(p_hdr->height); q_dst->coded_width = vic_round_dim(q_dst->visible_width, hdr_width_div); q_dst->coded_height = vic_round_dim(q_dst->visible_height, hdr_height_div); q_dst->sizeimage = q_dst->coded_width * q_dst->coded_height * q_dst->info->sizeimage_mult / q_dst->info->sizeimage_div; ctx->state.colorspace = ntohl(p_hdr->colorspace); ctx->state.xfer_func = ntohl(p_hdr->xfer_func); ctx->state.ycbcr_enc = ntohl(p_hdr->ycbcr_enc); ctx->state.quantization = ntohl(p_hdr->quantization); } static void set_last_buffer(struct vb2_v4l2_buffer *dst_buf, const struct vb2_v4l2_buffer *src_buf, struct vicodec_ctx *ctx) { struct vicodec_q_data *q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); vb2_set_plane_payload(&dst_buf->vb2_buf, 0, 0); dst_buf->sequence = q_dst->sequence++; v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, !ctx->is_enc); dst_buf->flags |= V4L2_BUF_FLAG_LAST; v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE); } static int job_ready(void *priv) { static const u8 magic[] = { 0x4f, 0x4f, 0x4f, 0x4f, 0xff, 0xff, 0xff, 0xff }; struct vicodec_ctx *ctx = priv; struct vb2_v4l2_buffer *src_buf; u8 *p_src; u8 *p; u32 sz; u32 state; struct vicodec_q_data *q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); unsigned int flags; unsigned int hdr_width_div; unsigned int hdr_height_div; unsigned int max_to_copy; unsigned int comp_frame_size; if (ctx->source_changed) return 0; if (ctx->is_stateless || ctx->is_enc || ctx->comp_has_frame) return 1; restart: ctx->comp_has_next_frame = false; src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); if (!src_buf) return 0; p_src = vb2_plane_vaddr(&src_buf->vb2_buf, 0); sz = vb2_get_plane_payload(&src_buf->vb2_buf, 0); p = p_src + ctx->cur_buf_offset; state = VB2_BUF_STATE_DONE; if (ctx->header_size < sizeof(struct fwht_cframe_hdr)) { state = get_next_header(ctx, &p, p_src + sz - p); if (ctx->header_size < sizeof(struct fwht_cframe_hdr)) { if (v4l2_m2m_is_last_draining_src_buf(ctx->fh.m2m_ctx, src_buf)) return 1; job_remove_src_buf(ctx, state); goto restart; } } comp_frame_size = ntohl(ctx->state.header.size); /* * The current scanned frame might be the first frame of a new * resolution so its size might be larger than ctx->comp_max_size. * In that case it is copied up to the current buffer capacity and * the copy will continue after allocating new large enough buffer * when restreaming */ max_to_copy = min(comp_frame_size, ctx->comp_max_size); if (ctx->comp_size < max_to_copy) { u32 copy = max_to_copy - ctx->comp_size; if (copy > p_src + sz - p) copy = p_src + sz - p; memcpy(ctx->state.compressed_frame + ctx->comp_size, p, copy); p += copy; ctx->comp_size += copy; if (ctx->comp_size < max_to_copy) { if (v4l2_m2m_is_last_draining_src_buf(ctx->fh.m2m_ctx, src_buf)) return 1; job_remove_src_buf(ctx, state); goto restart; } } ctx->cur_buf_offset = p - p_src; if (ctx->comp_size == comp_frame_size) ctx->comp_has_frame = true; ctx->comp_has_next_frame = false; if (ctx->comp_has_frame && sz - ctx->cur_buf_offset >= sizeof(struct fwht_cframe_hdr)) { struct fwht_cframe_hdr *p_hdr = (struct fwht_cframe_hdr *)p; u32 frame_size = ntohl(p_hdr->size); u32 remaining = sz - ctx->cur_buf_offset - sizeof(*p_hdr); if (!memcmp(p, magic, sizeof(magic))) ctx->comp_has_next_frame = remaining >= frame_size; } /* * if the header is invalid the device_run will just drop the frame * with an error */ if (!is_header_valid(&ctx->state.header) && ctx->comp_has_frame) return 1; flags = ntohl(ctx->state.header.flags); hdr_width_div = (flags & V4L2_FWHT_FL_CHROMA_FULL_WIDTH) ? 1 : 2; hdr_height_div = (flags & V4L2_FWHT_FL_CHROMA_FULL_HEIGHT) ? 1 : 2; if (ntohl(ctx->state.header.width) != q_dst->visible_width || ntohl(ctx->state.header.height) != q_dst->visible_height || !q_dst->info || hdr_width_div != q_dst->info->width_div || hdr_height_div != q_dst->info->height_div) { static const struct v4l2_event rs_event = { .type = V4L2_EVENT_SOURCE_CHANGE, .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, }; struct vb2_v4l2_buffer *dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); update_capture_data_from_header(ctx); v4l2_event_queue_fh(&ctx->fh, &rs_event); set_last_buffer(dst_buf, src_buf, ctx); ctx->source_changed = true; return 0; } return 1; } /* * video ioctls */ static const struct v4l2_fwht_pixfmt_info *find_fmt(u32 fmt) { const struct v4l2_fwht_pixfmt_info *info = v4l2_fwht_find_pixfmt(fmt); if (!info) info = v4l2_fwht_get_pixfmt(0); return info; } static int vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { strscpy(cap->driver, VICODEC_NAME, sizeof(cap->driver)); strscpy(cap->card, VICODEC_NAME, sizeof(cap->card)); snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", VICODEC_NAME); return 0; } static int enum_fmt(struct v4l2_fmtdesc *f, struct vicodec_ctx *ctx, bool is_out) { bool is_uncomp = (ctx->is_enc && is_out) || (!ctx->is_enc && !is_out); if (V4L2_TYPE_IS_MULTIPLANAR(f->type) && !multiplanar) return -EINVAL; if (!V4L2_TYPE_IS_MULTIPLANAR(f->type) && multiplanar) return -EINVAL; if (is_uncomp) { const struct v4l2_fwht_pixfmt_info *info = get_q_data(ctx, f->type)->info; if (ctx->is_enc || !vb2_is_streaming(&ctx->fh.m2m_ctx->cap_q_ctx.q)) info = v4l2_fwht_get_pixfmt(f->index); else info = v4l2_fwht_find_nth_fmt(info->width_div, info->height_div, info->components_num, info->pixenc, f->index); if (!info) return -EINVAL; f->pixelformat = info->id; } else { if (f->index) return -EINVAL; f->pixelformat = ctx->is_stateless ? V4L2_PIX_FMT_FWHT_STATELESS : V4L2_PIX_FMT_FWHT; if (!ctx->is_enc && !ctx->is_stateless) f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM; } return 0; } static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fmtdesc *f) { struct vicodec_ctx *ctx = file2ctx(file); return enum_fmt(f, ctx, false); } static int vidioc_enum_fmt_vid_out(struct file *file, void *priv, struct v4l2_fmtdesc *f) { struct vicodec_ctx *ctx = file2ctx(file); return enum_fmt(f, ctx, true); } static int vidioc_g_fmt(struct vicodec_ctx *ctx, struct v4l2_format *f) { struct vb2_queue *vq; struct vicodec_q_data *q_data; struct v4l2_pix_format_mplane *pix_mp; struct v4l2_pix_format *pix; const struct v4l2_fwht_pixfmt_info *info; vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); if (!vq) return -EINVAL; q_data = get_q_data(ctx, f->type); info = q_data->info; switch (f->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (multiplanar) return -EINVAL; pix = &f->fmt.pix; pix->width = q_data->coded_width; pix->height = q_data->coded_height; pix->field = V4L2_FIELD_NONE; pix->pixelformat = info->id; pix->bytesperline = q_data->coded_width * info->bytesperline_mult; pix->sizeimage = q_data->sizeimage; pix->colorspace = ctx->state.colorspace; pix->xfer_func = ctx->state.xfer_func; pix->ycbcr_enc = ctx->state.ycbcr_enc; pix->quantization = ctx->state.quantization; break; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: if (!multiplanar) return -EINVAL; pix_mp = &f->fmt.pix_mp; pix_mp->width = q_data->coded_width; pix_mp->height = q_data->coded_height; pix_mp->field = V4L2_FIELD_NONE; pix_mp->pixelformat = info->id; pix_mp->num_planes = 1; pix_mp->plane_fmt[0].bytesperline = q_data->coded_width * info->bytesperline_mult; pix_mp->plane_fmt[0].sizeimage = q_data->sizeimage; pix_mp->colorspace = ctx->state.colorspace; pix_mp->xfer_func = ctx->state.xfer_func; pix_mp->ycbcr_enc = ctx->state.ycbcr_enc; pix_mp->quantization = ctx->state.quantization; break; default: return -EINVAL; } return 0; } static int vidioc_g_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { return vidioc_g_fmt(file2ctx(file), f); } static int vidioc_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { return vidioc_g_fmt(file2ctx(file), f); } static int vidioc_try_fmt(struct vicodec_ctx *ctx, struct v4l2_format *f) { struct v4l2_pix_format_mplane *pix_mp; struct v4l2_pix_format *pix; struct v4l2_plane_pix_format *plane; const struct v4l2_fwht_pixfmt_info *info = ctx->is_stateless ? &pixfmt_stateless_fwht : &pixfmt_fwht; switch (f->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: pix = &f->fmt.pix; if (pix->pixelformat != V4L2_PIX_FMT_FWHT && pix->pixelformat != V4L2_PIX_FMT_FWHT_STATELESS) info = find_fmt(pix->pixelformat); pix->width = clamp(pix->width, MIN_WIDTH, MAX_WIDTH); pix->width = vic_round_dim(pix->width, info->width_div); pix->height = clamp(pix->height, MIN_HEIGHT, MAX_HEIGHT); pix->height = vic_round_dim(pix->height, info->height_div); pix->field = V4L2_FIELD_NONE; pix->bytesperline = pix->width * info->bytesperline_mult; pix->sizeimage = pix->width * pix->height * info->sizeimage_mult / info->sizeimage_div; if (pix->pixelformat == V4L2_PIX_FMT_FWHT) pix->sizeimage += sizeof(struct fwht_cframe_hdr); break; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: pix_mp = &f->fmt.pix_mp; plane = pix_mp->plane_fmt; if (pix_mp->pixelformat != V4L2_PIX_FMT_FWHT && pix_mp->pixelformat != V4L2_PIX_FMT_FWHT_STATELESS) info = find_fmt(pix_mp->pixelformat); pix_mp->num_planes = 1; pix_mp->width = clamp(pix_mp->width, MIN_WIDTH, MAX_WIDTH); pix_mp->width = vic_round_dim(pix_mp->width, info->width_div); pix_mp->height = clamp(pix_mp->height, MIN_HEIGHT, MAX_HEIGHT); pix_mp->height = vic_round_dim(pix_mp->height, info->height_div); pix_mp->field = V4L2_FIELD_NONE; plane->bytesperline = pix_mp->width * info->bytesperline_mult; plane->sizeimage = pix_mp->width * pix_mp->height * info->sizeimage_mult / info->sizeimage_div; if (pix_mp->pixelformat == V4L2_PIX_FMT_FWHT) plane->sizeimage += sizeof(struct fwht_cframe_hdr); break; default: return -EINVAL; } return 0; } static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vicodec_ctx *ctx = file2ctx(file); struct v4l2_pix_format_mplane *pix_mp; struct v4l2_pix_format *pix; switch (f->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: if (multiplanar) return -EINVAL; pix = &f->fmt.pix; pix->pixelformat = ctx->is_enc ? V4L2_PIX_FMT_FWHT : find_fmt(f->fmt.pix.pixelformat)->id; pix->colorspace = ctx->state.colorspace; pix->xfer_func = ctx->state.xfer_func; pix->ycbcr_enc = ctx->state.ycbcr_enc; pix->quantization = ctx->state.quantization; break; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: if (!multiplanar) return -EINVAL; pix_mp = &f->fmt.pix_mp; pix_mp->pixelformat = ctx->is_enc ? V4L2_PIX_FMT_FWHT : find_fmt(pix_mp->pixelformat)->id; pix_mp->colorspace = ctx->state.colorspace; pix_mp->xfer_func = ctx->state.xfer_func; pix_mp->ycbcr_enc = ctx->state.ycbcr_enc; pix_mp->quantization = ctx->state.quantization; break; default: return -EINVAL; } return vidioc_try_fmt(ctx, f); } static int vidioc_try_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct vicodec_ctx *ctx = file2ctx(file); struct v4l2_pix_format_mplane *pix_mp; struct v4l2_pix_format *pix; switch (f->type) { case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (multiplanar) return -EINVAL; pix = &f->fmt.pix; if (ctx->is_enc) pix->pixelformat = find_fmt(pix->pixelformat)->id; else if (ctx->is_stateless) pix->pixelformat = V4L2_PIX_FMT_FWHT_STATELESS; else pix->pixelformat = V4L2_PIX_FMT_FWHT; if (!pix->colorspace) pix->colorspace = V4L2_COLORSPACE_REC709; break; case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: if (!multiplanar) return -EINVAL; pix_mp = &f->fmt.pix_mp; if (ctx->is_enc) pix_mp->pixelformat = find_fmt(pix_mp->pixelformat)->id; else if (ctx->is_stateless) pix_mp->pixelformat = V4L2_PIX_FMT_FWHT_STATELESS; else pix_mp->pixelformat = V4L2_PIX_FMT_FWHT; if (!pix_mp->colorspace) pix_mp->colorspace = V4L2_COLORSPACE_REC709; break; default: return -EINVAL; } return vidioc_try_fmt(ctx, f); } static int vidioc_s_fmt(struct vicodec_ctx *ctx, struct v4l2_format *f) { struct vicodec_q_data *q_data; struct vb2_queue *vq; bool fmt_changed = true; struct v4l2_pix_format_mplane *pix_mp; struct v4l2_pix_format *pix; vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); if (!vq) return -EINVAL; q_data = get_q_data(ctx, f->type); if (!q_data) return -EINVAL; switch (f->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: pix = &f->fmt.pix; if (ctx->is_enc && V4L2_TYPE_IS_OUTPUT(f->type)) fmt_changed = !q_data->info || q_data->info->id != pix->pixelformat || q_data->coded_width != pix->width || q_data->coded_height != pix->height; if (vb2_is_busy(vq) && fmt_changed) return -EBUSY; if (pix->pixelformat == V4L2_PIX_FMT_FWHT) q_data->info = &pixfmt_fwht; else if (pix->pixelformat == V4L2_PIX_FMT_FWHT_STATELESS) q_data->info = &pixfmt_stateless_fwht; else q_data->info = find_fmt(pix->pixelformat); q_data->coded_width = pix->width; q_data->coded_height = pix->height; q_data->sizeimage = pix->sizeimage; break; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: pix_mp = &f->fmt.pix_mp; if (ctx->is_enc && V4L2_TYPE_IS_OUTPUT(f->type)) fmt_changed = !q_data->info || q_data->info->id != pix_mp->pixelformat || q_data->coded_width != pix_mp->width || q_data->coded_height != pix_mp->height; if (vb2_is_busy(vq) && fmt_changed) return -EBUSY; if (pix_mp->pixelformat == V4L2_PIX_FMT_FWHT) q_data->info = &pixfmt_fwht; else if (pix_mp->pixelformat == V4L2_PIX_FMT_FWHT_STATELESS) q_data->info = &pixfmt_stateless_fwht; else q_data->info = find_fmt(pix_mp->pixelformat); q_data->coded_width = pix_mp->width; q_data->coded_height = pix_mp->height; q_data->sizeimage = pix_mp->plane_fmt[0].sizeimage; break; default: return -EINVAL; } dprintk(ctx->dev, "Setting format for type %d, coded wxh: %dx%d, fourcc: 0x%08x\n", f->type, q_data->coded_width, q_data->coded_height, q_data->info->id); return 0; } static int vidioc_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { int ret; ret = vidioc_try_fmt_vid_cap(file, priv, f); if (ret) return ret; return vidioc_s_fmt(file2ctx(file), f); } static int vidioc_s_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct vicodec_ctx *ctx = file2ctx(file); struct vicodec_q_data *q_data; struct vicodec_q_data *q_data_cap; struct v4l2_pix_format *pix; struct v4l2_pix_format_mplane *pix_mp; u32 coded_w = 0, coded_h = 0; unsigned int size = 0; int ret; q_data = get_q_data(ctx, f->type); q_data_cap = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); ret = vidioc_try_fmt_vid_out(file, priv, f); if (ret) return ret; if (ctx->is_enc) { struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); struct vb2_queue *vq_cap = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); const struct v4l2_fwht_pixfmt_info *info = ctx->is_stateless ? &pixfmt_stateless_fwht : &pixfmt_fwht; if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) { coded_w = f->fmt.pix.width; coded_h = f->fmt.pix.height; } else { coded_w = f->fmt.pix_mp.width; coded_h = f->fmt.pix_mp.height; } if (vb2_is_busy(vq) && (coded_w != q_data->coded_width || coded_h != q_data->coded_height)) return -EBUSY; size = coded_w * coded_h * info->sizeimage_mult / info->sizeimage_div; if (!ctx->is_stateless) size += sizeof(struct fwht_cframe_hdr); if (vb2_is_busy(vq_cap) && size > q_data_cap->sizeimage) return -EBUSY; } ret = vidioc_s_fmt(file2ctx(file), f); if (!ret) { if (ctx->is_enc) { q_data->visible_width = coded_w; q_data->visible_height = coded_h; q_data_cap->coded_width = coded_w; q_data_cap->coded_height = coded_h; q_data_cap->sizeimage = size; } switch (f->type) { case V4L2_BUF_TYPE_VIDEO_OUTPUT: pix = &f->fmt.pix; ctx->state.colorspace = pix->colorspace; ctx->state.xfer_func = pix->xfer_func; ctx->state.ycbcr_enc = pix->ycbcr_enc; ctx->state.quantization = pix->quantization; break; case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: pix_mp = &f->fmt.pix_mp; ctx->state.colorspace = pix_mp->colorspace; ctx->state.xfer_func = pix_mp->xfer_func; ctx->state.ycbcr_enc = pix_mp->ycbcr_enc; ctx->state.quantization = pix_mp->quantization; break; default: break; } } return ret; } static int vidioc_g_selection(struct file *file, void *priv, struct v4l2_selection *s) { struct vicodec_ctx *ctx = file2ctx(file); struct vicodec_q_data *q_data; q_data = get_q_data(ctx, s->type); if (!q_data) return -EINVAL; /* * encoder supports only cropping on the OUTPUT buffer * decoder supports only composing on the CAPTURE buffer */ if (ctx->is_enc && s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) { switch (s->target) { case V4L2_SEL_TGT_CROP: s->r.left = 0; s->r.top = 0; s->r.width = q_data->visible_width; s->r.height = q_data->visible_height; return 0; case V4L2_SEL_TGT_CROP_DEFAULT: case V4L2_SEL_TGT_CROP_BOUNDS: s->r.left = 0; s->r.top = 0; s->r.width = q_data->coded_width; s->r.height = q_data->coded_height; return 0; } } else if (!ctx->is_enc && s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { switch (s->target) { case V4L2_SEL_TGT_COMPOSE: s->r.left = 0; s->r.top = 0; s->r.width = q_data->visible_width; s->r.height = q_data->visible_height; return 0; case V4L2_SEL_TGT_COMPOSE_DEFAULT: case V4L2_SEL_TGT_COMPOSE_BOUNDS: s->r.left = 0; s->r.top = 0; s->r.width = q_data->coded_width; s->r.height = q_data->coded_height; return 0; } } return -EINVAL; } static int vidioc_s_selection(struct file *file, void *priv, struct v4l2_selection *s) { struct vicodec_ctx *ctx = file2ctx(file); struct vicodec_q_data *q_data; if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) return -EINVAL; q_data = get_q_data(ctx, s->type); if (!q_data) return -EINVAL; if (!ctx->is_enc || s->target != V4L2_SEL_TGT_CROP) return -EINVAL; s->r.left = 0; s->r.top = 0; q_data->visible_width = clamp(s->r.width, MIN_WIDTH, q_data->coded_width); s->r.width = q_data->visible_width; q_data->visible_height = clamp(s->r.height, MIN_HEIGHT, q_data->coded_height); s->r.height = q_data->visible_height; return 0; } static int vicodec_encoder_cmd(struct file *file, void *fh, struct v4l2_encoder_cmd *ec) { struct vicodec_ctx *ctx = file2ctx(file); int ret; ret = v4l2_m2m_ioctl_try_encoder_cmd(file, fh, ec); if (ret < 0) return ret; if (!vb2_is_streaming(&ctx->fh.m2m_ctx->out_q_ctx.q)) return 0; ret = v4l2_m2m_ioctl_encoder_cmd(file, fh, ec); if (ret < 0) return ret; if (ec->cmd == V4L2_ENC_CMD_STOP && v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) v4l2_event_queue_fh(&ctx->fh, &vicodec_eos_event); if (ec->cmd == V4L2_ENC_CMD_START && v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) vb2_clear_last_buffer_dequeued(&ctx->fh.m2m_ctx->cap_q_ctx.q); return 0; } static int vicodec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc) { struct vicodec_ctx *ctx = file2ctx(file); int ret; /* * This ioctl should not be used with a stateless codec that doesn't * support holding buffers and the associated flush command. */ WARN_ON(ctx->is_stateless); ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc); if (ret < 0) return ret; if (!vb2_is_streaming(&ctx->fh.m2m_ctx->out_q_ctx.q)) return 0; ret = v4l2_m2m_ioctl_decoder_cmd(file, fh, dc); if (ret < 0) return ret; if (dc->cmd == V4L2_DEC_CMD_STOP && v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) v4l2_event_queue_fh(&ctx->fh, &vicodec_eos_event); if (dc->cmd == V4L2_DEC_CMD_START && v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) vb2_clear_last_buffer_dequeued(&ctx->fh.m2m_ctx->cap_q_ctx.q); return 0; } static int vicodec_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { switch (fsize->pixel_format) { case V4L2_PIX_FMT_FWHT_STATELESS: break; case V4L2_PIX_FMT_FWHT: break; default: if (find_fmt(fsize->pixel_format)->id == fsize->pixel_format) break; return -EINVAL; } if (fsize->index) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; fsize->stepwise.min_width = MIN_WIDTH; fsize->stepwise.max_width = MAX_WIDTH; fsize->stepwise.step_width = 8; fsize->stepwise.min_height = MIN_HEIGHT; fsize->stepwise.max_height = MAX_HEIGHT; fsize->stepwise.step_height = 8; return 0; } static int vicodec_subscribe_event(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub) { struct vicodec_ctx *ctx = container_of(fh, struct vicodec_ctx, fh); switch (sub->type) { case V4L2_EVENT_SOURCE_CHANGE: if (ctx->is_enc) return -EINVAL; fallthrough; case V4L2_EVENT_EOS: if (ctx->is_stateless) return -EINVAL; return v4l2_event_subscribe(fh, sub, 0, NULL); default: return v4l2_ctrl_subscribe_event(fh, sub); } } static const struct v4l2_ioctl_ops vicodec_ioctl_ops = { .vidioc_querycap = vidioc_querycap, .vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap, .vidioc_g_fmt_vid_cap = vidioc_g_fmt_vid_cap, .vidioc_try_fmt_vid_cap = vidioc_try_fmt_vid_cap, .vidioc_s_fmt_vid_cap = vidioc_s_fmt_vid_cap, .vidioc_g_fmt_vid_cap_mplane = vidioc_g_fmt_vid_cap, .vidioc_try_fmt_vid_cap_mplane = vidioc_try_fmt_vid_cap, .vidioc_s_fmt_vid_cap_mplane = vidioc_s_fmt_vid_cap, .vidioc_enum_fmt_vid_out = vidioc_enum_fmt_vid_out, .vidioc_g_fmt_vid_out = vidioc_g_fmt_vid_out, .vidioc_try_fmt_vid_out = vidioc_try_fmt_vid_out, .vidioc_s_fmt_vid_out = vidioc_s_fmt_vid_out, .vidioc_g_fmt_vid_out_mplane = vidioc_g_fmt_vid_out, .vidioc_try_fmt_vid_out_mplane = vidioc_try_fmt_vid_out, .vidioc_s_fmt_vid_out_mplane = vidioc_s_fmt_vid_out, .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, .vidioc_remove_bufs = v4l2_m2m_ioctl_remove_bufs, .vidioc_streamon = v4l2_m2m_ioctl_streamon, .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, .vidioc_g_selection = vidioc_g_selection, .vidioc_s_selection = vidioc_s_selection, .vidioc_try_encoder_cmd = v4l2_m2m_ioctl_try_encoder_cmd, .vidioc_encoder_cmd = vicodec_encoder_cmd, .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd, .vidioc_decoder_cmd = vicodec_decoder_cmd, .vidioc_enum_framesizes = vicodec_enum_framesizes, .vidioc_subscribe_event = vicodec_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; /* * Queue operations */ static int vicodec_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct vicodec_ctx *ctx = vb2_get_drv_priv(vq); struct vicodec_q_data *q_data = get_q_data(ctx, vq->type); unsigned int size = q_data->sizeimage; if (*nplanes) return sizes[0] < size ? -EINVAL : 0; *nplanes = 1; sizes[0] = size; q_data->vb2_sizeimage = size; return 0; } static int vicodec_buf_out_validate(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); vbuf->field = V4L2_FIELD_NONE; return 0; } static int vicodec_buf_prepare(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vicodec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); struct vicodec_q_data *q_data; dprintk(ctx->dev, "type: %d\n", vb->vb2_queue->type); q_data = get_q_data(ctx, vb->vb2_queue->type); if (V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) { if (vbuf->field == V4L2_FIELD_ANY) vbuf->field = V4L2_FIELD_NONE; if (vbuf->field != V4L2_FIELD_NONE) { dprintk(ctx->dev, "%s field isn't supported\n", __func__); return -EINVAL; } } if (vb2_plane_size(vb, 0) < q_data->vb2_sizeimage) { dprintk(ctx->dev, "%s data will not fit into plane (%lu < %lu)\n", __func__, vb2_plane_size(vb, 0), (long)q_data->vb2_sizeimage); return -EINVAL; } return 0; } static void vicodec_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vicodec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); unsigned int sz = vb2_get_plane_payload(&vbuf->vb2_buf, 0); u8 *p_src = vb2_plane_vaddr(&vbuf->vb2_buf, 0); u8 *p = p_src; struct vb2_queue *vq_out = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); struct vb2_queue *vq_cap = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); bool header_valid = false; static const struct v4l2_event rs_event = { .type = V4L2_EVENT_SOURCE_CHANGE, .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, }; if (V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type) && vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(ctx->fh.m2m_ctx)) { unsigned int i; for (i = 0; i < vb->num_planes; i++) vb2_set_plane_payload(vb, i, 0); vbuf->field = V4L2_FIELD_NONE; vbuf->sequence = get_q_data(ctx, vb->vb2_queue->type)->sequence++; v4l2_m2m_last_buffer_done(ctx->fh.m2m_ctx, vbuf); v4l2_event_queue_fh(&ctx->fh, &vicodec_eos_event); return; } /* buf_queue handles only the first source change event */ if (ctx->first_source_change_sent) { v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); return; } /* * if both queues are streaming, the source change event is * handled in job_ready */ if (vb2_is_streaming(vq_cap) && vb2_is_streaming(vq_out)) { v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); return; } /* * source change event is relevant only for the stateful decoder * in the compressed stream */ if (ctx->is_stateless || ctx->is_enc || V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type)) { v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); return; } do { enum vb2_buffer_state state = get_next_header(ctx, &p, p_src + sz - p); if (ctx->header_size < sizeof(struct fwht_cframe_hdr)) { v4l2_m2m_buf_done(vbuf, state); return; } header_valid = is_header_valid(&ctx->state.header); /* * p points right after the end of the header in the * buffer. If the header is invalid we set p to point * to the next byte after the start of the header */ if (!header_valid) { p = p - sizeof(struct fwht_cframe_hdr) + 1; if (p < p_src) p = p_src; ctx->header_size = 0; ctx->comp_magic_cnt = 0; } } while (!header_valid); ctx->cur_buf_offset = p - p_src; update_capture_data_from_header(ctx); ctx->first_source_change_sent = true; v4l2_event_queue_fh(&ctx->fh, &rs_event); v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); } static void vicodec_return_bufs(struct vb2_queue *q, u32 state) { struct vicodec_ctx *ctx = vb2_get_drv_priv(q); struct vb2_v4l2_buffer *vbuf; for (;;) { if (V4L2_TYPE_IS_OUTPUT(q->type)) vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); else vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); if (vbuf == NULL) return; v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req, &ctx->hdl); spin_lock(ctx->lock); v4l2_m2m_buf_done(vbuf, state); spin_unlock(ctx->lock); } } static unsigned int total_frame_size(struct vicodec_q_data *q_data) { unsigned int size; unsigned int chroma_div; if (!q_data->info) { WARN_ON(1); return 0; } size = q_data->coded_width * q_data->coded_height; chroma_div = q_data->info->width_div * q_data->info->height_div; if (q_data->info->components_num == 4) return 2 * size + 2 * (size / chroma_div); else if (q_data->info->components_num == 3) return size + 2 * (size / chroma_div); return size; } static int vicodec_start_streaming(struct vb2_queue *q, unsigned int count) { struct vicodec_ctx *ctx = vb2_get_drv_priv(q); struct vicodec_q_data *q_data = get_q_data(ctx, q->type); struct v4l2_fwht_state *state = &ctx->state; const struct v4l2_fwht_pixfmt_info *info = q_data->info; unsigned int size = q_data->coded_width * q_data->coded_height; unsigned int chroma_div; unsigned int total_planes_size; u8 *new_comp_frame = NULL; chroma_div = info->width_div * info->height_div; q_data->sequence = 0; v4l2_m2m_update_start_streaming_state(ctx->fh.m2m_ctx, q); state->gop_cnt = 0; if ((V4L2_TYPE_IS_OUTPUT(q->type) && !ctx->is_enc) || (V4L2_TYPE_IS_CAPTURE(q->type) && ctx->is_enc)) return 0; if (info->id == V4L2_PIX_FMT_FWHT || info->id == V4L2_PIX_FMT_FWHT_STATELESS) { vicodec_return_bufs(q, VB2_BUF_STATE_QUEUED); return -EINVAL; } total_planes_size = total_frame_size(q_data); ctx->comp_max_size = total_planes_size; state->visible_width = q_data->visible_width; state->visible_height = q_data->visible_height; state->coded_width = q_data->coded_width; state->coded_height = q_data->coded_height; state->stride = q_data->coded_width * info->bytesperline_mult; if (ctx->is_stateless) { state->ref_stride = state->stride; return 0; } state->ref_stride = q_data->coded_width * info->luma_alpha_step; state->ref_frame.buf = kvmalloc(total_planes_size, GFP_KERNEL); state->ref_frame.luma = state->ref_frame.buf; new_comp_frame = kvmalloc(ctx->comp_max_size, GFP_KERNEL); if (!state->ref_frame.luma || !new_comp_frame) { kvfree(state->ref_frame.luma); kvfree(new_comp_frame); vicodec_return_bufs(q, VB2_BUF_STATE_QUEUED); return -ENOMEM; } /* * if state->compressed_frame was already allocated then * it contain data of the first frame of the new resolution */ if (state->compressed_frame) { if (ctx->comp_size > ctx->comp_max_size) ctx->comp_size = ctx->comp_max_size; memcpy(new_comp_frame, state->compressed_frame, ctx->comp_size); } kvfree(state->compressed_frame); state->compressed_frame = new_comp_frame; if (info->components_num < 3) { state->ref_frame.cb = NULL; state->ref_frame.cr = NULL; state->ref_frame.alpha = NULL; return 0; } state->ref_frame.cb = state->ref_frame.luma + size; state->ref_frame.cr = state->ref_frame.cb + size / chroma_div; if (info->components_num == 4) state->ref_frame.alpha = state->ref_frame.cr + size / chroma_div; else state->ref_frame.alpha = NULL; return 0; } static void vicodec_stop_streaming(struct vb2_queue *q) { struct vicodec_ctx *ctx = vb2_get_drv_priv(q); vicodec_return_bufs(q, VB2_BUF_STATE_ERROR); v4l2_m2m_update_stop_streaming_state(ctx->fh.m2m_ctx, q); if (V4L2_TYPE_IS_OUTPUT(q->type) && v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) v4l2_event_queue_fh(&ctx->fh, &vicodec_eos_event); if (!ctx->is_enc && V4L2_TYPE_IS_OUTPUT(q->type)) ctx->first_source_change_sent = false; if ((!V4L2_TYPE_IS_OUTPUT(q->type) && !ctx->is_enc) || (V4L2_TYPE_IS_OUTPUT(q->type) && ctx->is_enc)) { if (!ctx->is_stateless) kvfree(ctx->state.ref_frame.buf); ctx->state.ref_frame.buf = NULL; ctx->state.ref_frame.luma = NULL; ctx->comp_max_size = 0; ctx->source_changed = false; } if (V4L2_TYPE_IS_OUTPUT(q->type) && !ctx->is_enc) { ctx->cur_buf_offset = 0; ctx->comp_size = 0; ctx->header_size = 0; ctx->comp_magic_cnt = 0; ctx->comp_has_frame = false; ctx->comp_has_next_frame = false; } } static void vicodec_buf_request_complete(struct vb2_buffer *vb) { struct vicodec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl); } static const struct vb2_ops vicodec_qops = { .queue_setup = vicodec_queue_setup, .buf_out_validate = vicodec_buf_out_validate, .buf_prepare = vicodec_buf_prepare, .buf_queue = vicodec_buf_queue, .buf_request_complete = vicodec_buf_request_complete, .start_streaming = vicodec_start_streaming, .stop_streaming = vicodec_stop_streaming, }; static int queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) { struct vicodec_ctx *ctx = priv; int ret; src_vq->type = (multiplanar ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE : V4L2_BUF_TYPE_VIDEO_OUTPUT); src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF; src_vq->drv_priv = ctx; src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); src_vq->ops = &vicodec_qops; src_vq->mem_ops = &vb2_vmalloc_memops; src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; if (ctx->is_enc) { src_vq->lock = &ctx->dev->stateful_enc.mutex; src_vq->min_reqbufs_allocation = VICODEC_REC_BUFS; } else if (ctx->is_stateless) { src_vq->lock = &ctx->dev->stateless_dec.mutex; } else { src_vq->lock = &ctx->dev->stateful_dec.mutex; } src_vq->supports_requests = ctx->is_stateless; src_vq->requires_requests = ctx->is_stateless; ret = vb2_queue_init(src_vq); if (ret) return ret; dst_vq->type = (multiplanar ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE : V4L2_BUF_TYPE_VIDEO_CAPTURE); dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF; dst_vq->max_num_buffers = 64; dst_vq->drv_priv = ctx; dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); dst_vq->ops = &vicodec_qops; dst_vq->mem_ops = &vb2_vmalloc_memops; dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; dst_vq->lock = src_vq->lock; if (!ctx->is_stateless && !ctx->is_enc) dst_vq->min_reqbufs_allocation = VICODEC_REC_BUFS; return vb2_queue_init(dst_vq); } static int vicodec_try_ctrl(struct v4l2_ctrl *ctrl) { struct vicodec_ctx *ctx = container_of(ctrl->handler, struct vicodec_ctx, hdl); const struct v4l2_ctrl_fwht_params *params; struct vicodec_q_data *q_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); switch (ctrl->id) { case V4L2_CID_STATELESS_FWHT_PARAMS: if (!q_dst->info) return -EINVAL; params = ctrl->p_new.p_fwht_params; if (params->width > q_dst->coded_width || params->width < MIN_WIDTH || params->height > q_dst->coded_height || params->height < MIN_HEIGHT) return -EINVAL; if (!validate_by_version(params->flags, params->version)) return -EINVAL; if (!validate_stateless_params_flags(params, q_dst->info)) return -EINVAL; return 0; default: return 0; } return 0; } static void update_header_from_stateless_params(struct vicodec_ctx *ctx, const struct v4l2_ctrl_fwht_params *params) { struct fwht_cframe_hdr *p_hdr = &ctx->state.header; p_hdr->magic1 = FWHT_MAGIC1; p_hdr->magic2 = FWHT_MAGIC2; p_hdr->version = htonl(params->version); p_hdr->width = htonl(params->width); p_hdr->height = htonl(params->height); p_hdr->flags = htonl(params->flags); p_hdr->colorspace = htonl(params->colorspace); p_hdr->xfer_func = htonl(params->xfer_func); p_hdr->ycbcr_enc = htonl(params->ycbcr_enc); p_hdr->quantization = htonl(params->quantization); } static int vicodec_s_ctrl(struct v4l2_ctrl *ctrl) { struct vicodec_ctx *ctx = container_of(ctrl->handler, struct vicodec_ctx, hdl); const struct v4l2_ctrl_fwht_params *params; switch (ctrl->id) { case V4L2_CID_MPEG_VIDEO_GOP_SIZE: ctx->state.gop_size = ctrl->val; return 0; case V4L2_CID_FWHT_I_FRAME_QP: ctx->state.i_frame_qp = ctrl->val; return 0; case V4L2_CID_FWHT_P_FRAME_QP: ctx->state.p_frame_qp = ctrl->val; return 0; case V4L2_CID_STATELESS_FWHT_PARAMS: params = ctrl->p_new.p_fwht_params; update_header_from_stateless_params(ctx, params); ctx->state.ref_frame_ts = params->backward_ref_ts; return 0; } return -EINVAL; } static const struct v4l2_ctrl_ops vicodec_ctrl_ops = { .s_ctrl = vicodec_s_ctrl, .try_ctrl = vicodec_try_ctrl, }; static const struct v4l2_ctrl_config vicodec_ctrl_stateless_state = { .ops = &vicodec_ctrl_ops, .id = V4L2_CID_STATELESS_FWHT_PARAMS, .elem_size = sizeof(struct v4l2_ctrl_fwht_params), }; /* * File operations */ static int vicodec_open(struct file *file) { const struct v4l2_fwht_pixfmt_info *info = v4l2_fwht_get_pixfmt(0); struct video_device *vfd = video_devdata(file); struct vicodec_dev *dev = video_drvdata(file); struct vicodec_ctx *ctx = NULL; struct v4l2_ctrl_handler *hdl; unsigned int raw_size; unsigned int comp_size; int rc = 0; if (mutex_lock_interruptible(vfd->lock)) return -ERESTARTSYS; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { rc = -ENOMEM; goto open_unlock; } if (vfd == &dev->stateful_enc.vfd) ctx->is_enc = true; else if (vfd == &dev->stateless_dec.vfd) ctx->is_stateless = true; v4l2_fh_init(&ctx->fh, video_devdata(file)); file->private_data = &ctx->fh; ctx->dev = dev; hdl = &ctx->hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &vicodec_ctrl_ops, V4L2_CID_MPEG_VIDEO_GOP_SIZE, 1, 16, 1, 10); v4l2_ctrl_new_std(hdl, &vicodec_ctrl_ops, V4L2_CID_FWHT_I_FRAME_QP, 1, 31, 1, 20); v4l2_ctrl_new_std(hdl, &vicodec_ctrl_ops, V4L2_CID_FWHT_P_FRAME_QP, 1, 31, 1, 20); if (ctx->is_stateless) v4l2_ctrl_new_custom(hdl, &vicodec_ctrl_stateless_state, NULL); else v4l2_ctrl_new_std(hdl, &vicodec_ctrl_ops, ctx->is_enc ? V4L2_CID_MIN_BUFFERS_FOR_OUTPUT : V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, VICODEC_REC_BUFS, VICODEC_REC_BUFS, 1, VICODEC_REC_BUFS); if (hdl->error) { rc = hdl->error; v4l2_ctrl_handler_free(hdl); kfree(ctx); goto open_unlock; } ctx->fh.ctrl_handler = hdl; v4l2_ctrl_handler_setup(hdl); if (ctx->is_enc) ctx->q_data[V4L2_M2M_SRC].info = info; else if (ctx->is_stateless) ctx->q_data[V4L2_M2M_SRC].info = &pixfmt_stateless_fwht; else ctx->q_data[V4L2_M2M_SRC].info = &pixfmt_fwht; ctx->q_data[V4L2_M2M_SRC].coded_width = 1280; ctx->q_data[V4L2_M2M_SRC].coded_height = 720; ctx->q_data[V4L2_M2M_SRC].visible_width = 1280; ctx->q_data[V4L2_M2M_SRC].visible_height = 720; raw_size = 1280 * 720 * info->sizeimage_mult / info->sizeimage_div; comp_size = 1280 * 720 * pixfmt_fwht.sizeimage_mult / pixfmt_fwht.sizeimage_div; if (ctx->is_enc) ctx->q_data[V4L2_M2M_SRC].sizeimage = raw_size; else if (ctx->is_stateless) ctx->q_data[V4L2_M2M_SRC].sizeimage = comp_size; else ctx->q_data[V4L2_M2M_SRC].sizeimage = comp_size + sizeof(struct fwht_cframe_hdr); ctx->q_data[V4L2_M2M_DST] = ctx->q_data[V4L2_M2M_SRC]; if (ctx->is_enc) { ctx->q_data[V4L2_M2M_DST].info = &pixfmt_fwht; ctx->q_data[V4L2_M2M_DST].sizeimage = comp_size + sizeof(struct fwht_cframe_hdr); } else { ctx->q_data[V4L2_M2M_DST].info = info; ctx->q_data[V4L2_M2M_DST].sizeimage = raw_size; } ctx->state.colorspace = V4L2_COLORSPACE_REC709; if (ctx->is_enc) { ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->stateful_enc.m2m_dev, ctx, &queue_init); ctx->lock = &dev->stateful_enc.lock; } else if (ctx->is_stateless) { ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->stateless_dec.m2m_dev, ctx, &queue_init); ctx->lock = &dev->stateless_dec.lock; } else { ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->stateful_dec.m2m_dev, ctx, &queue_init); ctx->lock = &dev->stateful_dec.lock; } if (IS_ERR(ctx->fh.m2m_ctx)) { rc = PTR_ERR(ctx->fh.m2m_ctx); v4l2_ctrl_handler_free(hdl); v4l2_fh_exit(&ctx->fh); kfree(ctx); goto open_unlock; } v4l2_fh_add(&ctx->fh); open_unlock: mutex_unlock(vfd->lock); return rc; } static int vicodec_release(struct file *file) { struct video_device *vfd = video_devdata(file); struct vicodec_ctx *ctx = file2ctx(file); mutex_lock(vfd->lock); v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); mutex_unlock(vfd->lock); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); v4l2_ctrl_handler_free(&ctx->hdl); kvfree(ctx->state.compressed_frame); kfree(ctx); return 0; } static int vicodec_request_validate(struct media_request *req) { struct media_request_object *obj; struct v4l2_ctrl_handler *parent_hdl, *hdl; struct vicodec_ctx *ctx = NULL; struct v4l2_ctrl *ctrl; unsigned int count; list_for_each_entry(obj, &req->objects, list) { struct vb2_buffer *vb; if (vb2_request_object_is_buffer(obj)) { vb = container_of(obj, struct vb2_buffer, req_obj); ctx = vb2_get_drv_priv(vb->vb2_queue); break; } } if (!ctx) { pr_err("No buffer was provided with the request\n"); return -ENOENT; } count = vb2_request_buffer_cnt(req); if (!count) { v4l2_info(&ctx->dev->v4l2_dev, "No buffer was provided with the request\n"); return -ENOENT; } else if (count > 1) { v4l2_info(&ctx->dev->v4l2_dev, "More than one buffer was provided with the request\n"); return -EINVAL; } parent_hdl = &ctx->hdl; hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl); if (!hdl) { v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control\n"); return -ENOENT; } ctrl = v4l2_ctrl_request_hdl_ctrl_find(hdl, vicodec_ctrl_stateless_state.id); v4l2_ctrl_request_hdl_put(hdl); if (!ctrl) { v4l2_info(&ctx->dev->v4l2_dev, "Missing required codec control\n"); return -ENOENT; } return vb2_request_validate(req); } static const struct v4l2_file_operations vicodec_fops = { .owner = THIS_MODULE, .open = vicodec_open, .release = vicodec_release, .poll = v4l2_m2m_fop_poll, .unlocked_ioctl = video_ioctl2, .mmap = v4l2_m2m_fop_mmap, }; static const struct video_device vicodec_videodev = { .name = VICODEC_NAME, .vfl_dir = VFL_DIR_M2M, .fops = &vicodec_fops, .ioctl_ops = &vicodec_ioctl_ops, .minor = -1, .release = video_device_release_empty, }; static const struct media_device_ops vicodec_m2m_media_ops = { .req_validate = vicodec_request_validate, .req_queue = v4l2_m2m_request_queue, }; static const struct v4l2_m2m_ops m2m_ops = { .device_run = device_run, .job_ready = job_ready, }; static int register_instance(struct vicodec_dev *dev, struct vicodec_dev_instance *dev_instance, const char *name, bool is_enc, bool is_stateless) { struct video_device *vfd; int ret; spin_lock_init(&dev_instance->lock); mutex_init(&dev_instance->mutex); dev_instance->m2m_dev = v4l2_m2m_init(&m2m_ops); if (IS_ERR(dev_instance->m2m_dev)) { v4l2_err(&dev->v4l2_dev, "Failed to init vicodec enc device\n"); return PTR_ERR(dev_instance->m2m_dev); } dev_instance->vfd = vicodec_videodev; vfd = &dev_instance->vfd; vfd->lock = &dev_instance->mutex; vfd->v4l2_dev = &dev->v4l2_dev; strscpy(vfd->name, name, sizeof(vfd->name)); vfd->device_caps = V4L2_CAP_STREAMING | (multiplanar ? V4L2_CAP_VIDEO_M2M_MPLANE : V4L2_CAP_VIDEO_M2M); if (is_enc || is_stateless) { v4l2_disable_ioctl(vfd, VIDIOC_DECODER_CMD); v4l2_disable_ioctl(vfd, VIDIOC_TRY_DECODER_CMD); } if (!is_enc) { v4l2_disable_ioctl(vfd, VIDIOC_ENCODER_CMD); v4l2_disable_ioctl(vfd, VIDIOC_TRY_ENCODER_CMD); } video_set_drvdata(vfd, dev); ret = video_register_device(vfd, VFL_TYPE_VIDEO, 0); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to register video device '%s'\n", name); v4l2_m2m_release(dev_instance->m2m_dev); return ret; } v4l2_info(&dev->v4l2_dev, "Device '%s' registered as /dev/video%d\n", name, vfd->num); return 0; } static void vicodec_v4l2_dev_release(struct v4l2_device *v4l2_dev) { struct vicodec_dev *dev = container_of(v4l2_dev, struct vicodec_dev, v4l2_dev); v4l2_device_unregister(&dev->v4l2_dev); v4l2_m2m_release(dev->stateful_enc.m2m_dev); v4l2_m2m_release(dev->stateful_dec.m2m_dev); v4l2_m2m_release(dev->stateless_dec.m2m_dev); #ifdef CONFIG_MEDIA_CONTROLLER media_device_cleanup(&dev->mdev); #endif kfree(dev); } static int vicodec_probe(struct platform_device *pdev) { struct vicodec_dev *dev; int ret; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); if (ret) goto free_dev; dev->v4l2_dev.release = vicodec_v4l2_dev_release; #ifdef CONFIG_MEDIA_CONTROLLER dev->mdev.dev = &pdev->dev; strscpy(dev->mdev.model, "vicodec", sizeof(dev->mdev.model)); strscpy(dev->mdev.bus_info, "platform:vicodec", sizeof(dev->mdev.bus_info)); media_device_init(&dev->mdev); dev->mdev.ops = &vicodec_m2m_media_ops; dev->v4l2_dev.mdev = &dev->mdev; #endif platform_set_drvdata(pdev, dev); ret = register_instance(dev, &dev->stateful_enc, "stateful-encoder", true, false); if (ret) goto unreg_dev; ret = register_instance(dev, &dev->stateful_dec, "stateful-decoder", false, false); if (ret) goto unreg_sf_enc; ret = register_instance(dev, &dev->stateless_dec, "stateless-decoder", false, true); if (ret) goto unreg_sf_dec; #ifdef CONFIG_MEDIA_CONTROLLER ret = v4l2_m2m_register_media_controller(dev->stateful_enc.m2m_dev, &dev->stateful_enc.vfd, MEDIA_ENT_F_PROC_VIDEO_ENCODER); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem media controller for enc\n"); goto unreg_m2m; } ret = v4l2_m2m_register_media_controller(dev->stateful_dec.m2m_dev, &dev->stateful_dec.vfd, MEDIA_ENT_F_PROC_VIDEO_DECODER); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem media controller for dec\n"); goto unreg_m2m_sf_enc_mc; } ret = v4l2_m2m_register_media_controller(dev->stateless_dec.m2m_dev, &dev->stateless_dec.vfd, MEDIA_ENT_F_PROC_VIDEO_DECODER); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem media controller for stateless dec\n"); goto unreg_m2m_sf_dec_mc; } ret = media_device_register(&dev->mdev); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to register mem2mem media device\n"); goto unreg_m2m_sl_dec_mc; } #endif return 0; #ifdef CONFIG_MEDIA_CONTROLLER unreg_m2m_sl_dec_mc: v4l2_m2m_unregister_media_controller(dev->stateless_dec.m2m_dev); unreg_m2m_sf_dec_mc: v4l2_m2m_unregister_media_controller(dev->stateful_dec.m2m_dev); unreg_m2m_sf_enc_mc: v4l2_m2m_unregister_media_controller(dev->stateful_enc.m2m_dev); unreg_m2m: video_unregister_device(&dev->stateless_dec.vfd); v4l2_m2m_release(dev->stateless_dec.m2m_dev); #endif unreg_sf_dec: video_unregister_device(&dev->stateful_dec.vfd); v4l2_m2m_release(dev->stateful_dec.m2m_dev); unreg_sf_enc: video_unregister_device(&dev->stateful_enc.vfd); v4l2_m2m_release(dev->stateful_enc.m2m_dev); unreg_dev: v4l2_device_unregister(&dev->v4l2_dev); free_dev: kfree(dev); return ret; } static void vicodec_remove(struct platform_device *pdev) { struct vicodec_dev *dev = platform_get_drvdata(pdev); v4l2_info(&dev->v4l2_dev, "Removing " VICODEC_NAME); #ifdef CONFIG_MEDIA_CONTROLLER media_device_unregister(&dev->mdev); v4l2_m2m_unregister_media_controller(dev->stateful_enc.m2m_dev); v4l2_m2m_unregister_media_controller(dev->stateful_dec.m2m_dev); v4l2_m2m_unregister_media_controller(dev->stateless_dec.m2m_dev); #endif video_unregister_device(&dev->stateful_enc.vfd); video_unregister_device(&dev->stateful_dec.vfd); video_unregister_device(&dev->stateless_dec.vfd); v4l2_device_put(&dev->v4l2_dev); } static struct platform_driver vicodec_pdrv = { .probe = vicodec_probe, .remove = vicodec_remove, .driver = { .name = VICODEC_NAME, }, }; static void __exit vicodec_exit(void) { platform_driver_unregister(&vicodec_pdrv); platform_device_unregister(&vicodec_pdev); } static int __init vicodec_init(void) { int ret; ret = platform_device_register(&vicodec_pdev); if (ret) return ret; ret = platform_driver_register(&vicodec_pdrv); if (ret) platform_device_unregister(&vicodec_pdev); return ret; } module_init(vicodec_init); module_exit(vicodec_exit);
14 14 14 13 13 13 13 6 6 9 9 1 7 7 7 7 7 7 14 14 14 11 9 9 14 14 13 6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 6 6 5 6 6 6 6 6 5 7 1 6 6 6 5 6 5 5 5 5 5 5 5 6 7 6 6 6 6 6 6 6 6 6 6 6 7 6 5 6 6 6 6 6 6 7 7 7 6 6 7 6 1 7 7 6 1 7 7 7 7 7 7 7 6 7 7 9 7 6 8 8 5 2 1 7 7 6 3 3 7 3 6 7 6 3 1 2 6 6 9 1 9 9 9 9 9 9 8 9 14 14 14 14 14 14 5 9 9 9 9 9 9 9 9 6 5 307 307 306 1 303 303 24 24 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011, 2012 STRATO. All rights reserved. */ #include <linux/blkdev.h> #include <linux/ratelimit.h> #include <linux/sched/mm.h> #include <crypto/hash.h> #include "ctree.h" #include "discard.h" #include "volumes.h" #include "disk-io.h" #include "ordered-data.h" #include "transaction.h" #include "backref.h" #include "extent_io.h" #include "dev-replace.h" #include "raid56.h" #include "block-group.h" #include "zoned.h" #include "fs.h" #include "accessors.h" #include "file-item.h" #include "scrub.h" #include "raid-stripe-tree.h" /* * This is only the first step towards a full-features scrub. It reads all * extent and super block and verifies the checksums. In case a bad checksum * is found or the extent cannot be read, good data will be written back if * any can be found. * * Future enhancements: * - In case an unrepairable extent is encountered, track which files are * affected and report them * - track and record media errors, throw out bad devices * - add a mode to also read unallocated space */ struct scrub_ctx; /* * The following value only influences the performance. * * This determines how many stripes would be submitted in one go, * which is 512KiB (BTRFS_STRIPE_LEN * SCRUB_STRIPES_PER_GROUP). */ #define SCRUB_STRIPES_PER_GROUP 8 /* * How many groups we have for each sctx. * * This would be 8M per device, the same value as the old scrub in-flight bios * size limit. */ #define SCRUB_GROUPS_PER_SCTX 16 #define SCRUB_TOTAL_STRIPES (SCRUB_GROUPS_PER_SCTX * SCRUB_STRIPES_PER_GROUP) /* * The following value times PAGE_SIZE needs to be large enough to match the * largest node/leaf/sector size that shall be supported. */ #define SCRUB_MAX_SECTORS_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K) /* Represent one sector and its needed info to verify the content. */ struct scrub_sector_verification { bool is_metadata; union { /* * Csum pointer for data csum verification. Should point to a * sector csum inside scrub_stripe::csums. * * NULL if this data sector has no csum. */ u8 *csum; /* * Extra info for metadata verification. All sectors inside a * tree block share the same generation. */ u64 generation; }; }; enum scrub_stripe_flags { /* Set when @mirror_num, @dev, @physical and @logical are set. */ SCRUB_STRIPE_FLAG_INITIALIZED, /* Set when the read-repair is finished. */ SCRUB_STRIPE_FLAG_REPAIR_DONE, /* * Set for data stripes if it's triggered from P/Q stripe. * During such scrub, we should not report errors in data stripes, nor * update the accounting. */ SCRUB_STRIPE_FLAG_NO_REPORT, }; #define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE) /* * Represent one contiguous range with a length of BTRFS_STRIPE_LEN. */ struct scrub_stripe { struct scrub_ctx *sctx; struct btrfs_block_group *bg; struct page *pages[SCRUB_STRIPE_PAGES]; struct scrub_sector_verification *sectors; struct btrfs_device *dev; u64 logical; u64 physical; u16 mirror_num; /* Should be BTRFS_STRIPE_LEN / sectorsize. */ u16 nr_sectors; /* * How many data/meta extents are in this stripe. Only for scrub status * reporting purposes. */ u16 nr_data_extents; u16 nr_meta_extents; atomic_t pending_io; wait_queue_head_t io_wait; wait_queue_head_t repair_wait; /* * Indicate the states of the stripe. Bits are defined in * scrub_stripe_flags enum. */ unsigned long state; /* Indicate which sectors are covered by extent items. */ unsigned long extent_sector_bitmap; /* * The errors hit during the initial read of the stripe. * * Would be utilized for error reporting and repair. * * The remaining init_nr_* records the number of errors hit, only used * by error reporting. */ unsigned long init_error_bitmap; unsigned int init_nr_io_errors; unsigned int init_nr_csum_errors; unsigned int init_nr_meta_errors; /* * The following error bitmaps are all for the current status. * Every time we submit a new read, these bitmaps may be updated. * * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap; * * IO and csum errors can happen for both metadata and data. */ unsigned long error_bitmap; unsigned long io_error_bitmap; unsigned long csum_error_bitmap; unsigned long meta_error_bitmap; /* For writeback (repair or replace) error reporting. */ unsigned long write_error_bitmap; /* Writeback can be concurrent, thus we need to protect the bitmap. */ spinlock_t write_error_lock; /* * Checksum for the whole stripe if this stripe is inside a data block * group. */ u8 *csums; struct work_struct work; }; struct scrub_ctx { struct scrub_stripe stripes[SCRUB_TOTAL_STRIPES]; struct scrub_stripe *raid56_data_stripes; struct btrfs_fs_info *fs_info; struct btrfs_path extent_path; struct btrfs_path csum_path; int first_free; int cur_stripe; atomic_t cancel_req; int readonly; /* State of IO submission throttling affecting the associated device */ ktime_t throttle_deadline; u64 throttle_sent; int is_dev_replace; u64 write_pointer; struct mutex wr_lock; struct btrfs_device *wr_tgtdev; /* * statistics */ struct btrfs_scrub_progress stat; spinlock_t stat_lock; /* * Use a ref counter to avoid use-after-free issues. Scrub workers * decrement bios_in_flight and workers_pending and then do a wakeup * on the list_wait wait queue. We must ensure the main scrub task * doesn't free the scrub context before or while the workers are * doing the wakeup() call. */ refcount_t refs; }; struct scrub_warning { struct btrfs_path *path; u64 extent_item_size; const char *errstr; u64 physical; u64 logical; struct btrfs_device *dev; }; static void release_scrub_stripe(struct scrub_stripe *stripe) { if (!stripe) return; for (int i = 0; i < SCRUB_STRIPE_PAGES; i++) { if (stripe->pages[i]) __free_page(stripe->pages[i]); stripe->pages[i] = NULL; } kfree(stripe->sectors); kfree(stripe->csums); stripe->sectors = NULL; stripe->csums = NULL; stripe->sctx = NULL; stripe->state = 0; } static int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe) { int ret; memset(stripe, 0, sizeof(*stripe)); stripe->nr_sectors = BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits; stripe->state = 0; init_waitqueue_head(&stripe->io_wait); init_waitqueue_head(&stripe->repair_wait); atomic_set(&stripe->pending_io, 0); spin_lock_init(&stripe->write_error_lock); ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false); if (ret < 0) goto error; stripe->sectors = kcalloc(stripe->nr_sectors, sizeof(struct scrub_sector_verification), GFP_KERNEL); if (!stripe->sectors) goto error; stripe->csums = kcalloc(BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits, fs_info->csum_size, GFP_KERNEL); if (!stripe->csums) goto error; return 0; error: release_scrub_stripe(stripe); return -ENOMEM; } static void wait_scrub_stripe_io(struct scrub_stripe *stripe) { wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0); } static void scrub_put_ctx(struct scrub_ctx *sctx); static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) { while (atomic_read(&fs_info->scrub_pause_req)) { mutex_unlock(&fs_info->scrub_lock); wait_event(fs_info->scrub_pause_wait, atomic_read(&fs_info->scrub_pause_req) == 0); mutex_lock(&fs_info->scrub_lock); } } static void scrub_pause_on(struct btrfs_fs_info *fs_info) { atomic_inc(&fs_info->scrubs_paused); wake_up(&fs_info->scrub_pause_wait); } static void scrub_pause_off(struct btrfs_fs_info *fs_info) { mutex_lock(&fs_info->scrub_lock); __scrub_blocked_if_needed(fs_info); atomic_dec(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); wake_up(&fs_info->scrub_pause_wait); } static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) { scrub_pause_on(fs_info); scrub_pause_off(fs_info); } static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) { int i; if (!sctx) return; for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) release_scrub_stripe(&sctx->stripes[i]); kvfree(sctx); } static void scrub_put_ctx(struct scrub_ctx *sctx) { if (refcount_dec_and_test(&sctx->refs)) scrub_free_ctx(sctx); } static noinline_for_stack struct scrub_ctx *scrub_setup_ctx( struct btrfs_fs_info *fs_info, int is_dev_replace) { struct scrub_ctx *sctx; int i; /* Since sctx has inline 128 stripes, it can go beyond 64K easily. Use * kvzalloc(). */ sctx = kvzalloc(sizeof(*sctx), GFP_KERNEL); if (!sctx) goto nomem; refcount_set(&sctx->refs, 1); sctx->is_dev_replace = is_dev_replace; sctx->fs_info = fs_info; sctx->extent_path.search_commit_root = 1; sctx->extent_path.skip_locking = 1; sctx->csum_path.search_commit_root = 1; sctx->csum_path.skip_locking = 1; for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) { int ret; ret = init_scrub_stripe(fs_info, &sctx->stripes[i]); if (ret < 0) goto nomem; sctx->stripes[i].sctx = sctx; } sctx->first_free = 0; atomic_set(&sctx->cancel_req, 0); spin_lock_init(&sctx->stat_lock); sctx->throttle_deadline = 0; mutex_init(&sctx->wr_lock); if (is_dev_replace) { WARN_ON(!fs_info->dev_replace.tgtdev); sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; } return sctx; nomem: scrub_free_ctx(sctx); return ERR_PTR(-ENOMEM); } static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, u64 root, void *warn_ctx) { u32 nlink; int ret; int i; unsigned nofs_flag; struct extent_buffer *eb; struct btrfs_inode_item *inode_item; struct scrub_warning *swarn = warn_ctx; struct btrfs_fs_info *fs_info = swarn->dev->fs_info; struct inode_fs_paths *ipath = NULL; struct btrfs_root *local_root; struct btrfs_key key; local_root = btrfs_get_fs_root(fs_info, root, true); if (IS_ERR(local_root)) { ret = PTR_ERR(local_root); goto err; } /* * this makes the path point to (inum INODE_ITEM ioff) */ key.objectid = inum; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0); if (ret) { btrfs_put_root(local_root); btrfs_release_path(swarn->path); goto err; } eb = swarn->path->nodes[0]; inode_item = btrfs_item_ptr(eb, swarn->path->slots[0], struct btrfs_inode_item); nlink = btrfs_inode_nlink(eb, inode_item); btrfs_release_path(swarn->path); /* * init_path might indirectly call vmalloc, or use GFP_KERNEL. Scrub * uses GFP_NOFS in this context, so we keep it consistent but it does * not seem to be strictly necessary. */ nofs_flag = memalloc_nofs_save(); ipath = init_ipath(4096, local_root, swarn->path); memalloc_nofs_restore(nofs_flag); if (IS_ERR(ipath)) { btrfs_put_root(local_root); ret = PTR_ERR(ipath); ipath = NULL; goto err; } ret = paths_from_inode(inum, ipath); if (ret < 0) goto err; /* * we deliberately ignore the bit ipath might have been too small to * hold all of the paths here */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, root, inum, offset, fs_info->sectorsize, nlink, (char *)(unsigned long)ipath->fspath->val[i]); btrfs_put_root(local_root); free_ipath(ipath); return 0; err: btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, root, inum, offset, ret); free_ipath(ipath); return 0; } static void scrub_print_common_warning(const char *errstr, struct btrfs_device *dev, bool is_super, u64 logical, u64 physical) { struct btrfs_fs_info *fs_info = dev->fs_info; struct btrfs_path *path; struct btrfs_key found_key; struct extent_buffer *eb; struct btrfs_extent_item *ei; struct scrub_warning swarn; u64 flags = 0; u32 item_size; int ret; /* Super block error, no need to search extent tree. */ if (is_super) { btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", errstr, btrfs_dev_name(dev), physical); return; } path = btrfs_alloc_path(); if (!path) return; swarn.physical = physical; swarn.logical = logical; swarn.errstr = errstr; swarn.dev = NULL; ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, &flags); if (ret < 0) goto out; swarn.extent_item_size = found_key.offset; eb = path->nodes[0]; ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); item_size = btrfs_item_size(eb, path->slots[0]); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { unsigned long ptr = 0; u8 ref_level; u64 ref_root; while (true) { ret = tree_backref_for_extent(&ptr, eb, &found_key, ei, item_size, &ref_root, &ref_level); if (ret < 0) { btrfs_warn(fs_info, "failed to resolve tree backref for logical %llu: %d", swarn.logical, ret); break; } if (ret > 0) break; btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", errstr, swarn.logical, btrfs_dev_name(dev), swarn.physical, (ref_level ? "node" : "leaf"), ref_level, ref_root); } btrfs_release_path(path); } else { struct btrfs_backref_walk_ctx ctx = { 0 }; btrfs_release_path(path); ctx.bytenr = found_key.objectid; ctx.extent_item_pos = swarn.logical - found_key.objectid; ctx.fs_info = fs_info; swarn.path = path; swarn.dev = dev; iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn); } out: btrfs_free_path(path); } static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical) { int ret = 0; u64 length; if (!btrfs_is_zoned(sctx->fs_info)) return 0; if (!btrfs_dev_is_sequential(sctx->wr_tgtdev, physical)) return 0; if (sctx->write_pointer < physical) { length = physical - sctx->write_pointer; ret = btrfs_zoned_issue_zeroout(sctx->wr_tgtdev, sctx->write_pointer, length); if (!ret) sctx->write_pointer = physical; } return ret; } static struct page *scrub_stripe_get_page(struct scrub_stripe *stripe, int sector_nr) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; int page_index = (sector_nr << fs_info->sectorsize_bits) >> PAGE_SHIFT; return stripe->pages[page_index]; } static unsigned int scrub_stripe_get_page_offset(struct scrub_stripe *stripe, int sector_nr) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; return offset_in_page(sector_nr << fs_info->sectorsize_bits); } static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; const u64 logical = stripe->logical + (sector_nr << fs_info->sectorsize_bits); const struct page *first_page = scrub_stripe_get_page(stripe, sector_nr); const unsigned int first_off = scrub_stripe_get_page_offset(stripe, sector_nr); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); u8 on_disk_csum[BTRFS_CSUM_SIZE]; u8 calculated_csum[BTRFS_CSUM_SIZE]; struct btrfs_header *header; /* * Here we don't have a good way to attach the pages (and subpages) * to a dummy extent buffer, thus we have to directly grab the members * from pages. */ header = (struct btrfs_header *)(page_address(first_page) + first_off); memcpy(on_disk_csum, header->csum, fs_info->csum_size); if (logical != btrfs_stack_header_bytenr(header)) { bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad bytenr, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_bytenr(header), logical); return; } if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) != 0) { bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad fsid, has %pU want %pU", logical, stripe->mirror_num, header->fsid, fs_info->fs_devices->fsid); return; } if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE) != 0) { bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", logical, stripe->mirror_num, header->chunk_tree_uuid, fs_info->chunk_tree_uuid); return; } /* Now check tree block csum. */ shash->tfm = fs_info->csum_shash; crypto_shash_init(shash); crypto_shash_update(shash, page_address(first_page) + first_off + BTRFS_CSUM_SIZE, fs_info->sectorsize - BTRFS_CSUM_SIZE); for (int i = sector_nr + 1; i < sector_nr + sectors_per_tree; i++) { struct page *page = scrub_stripe_get_page(stripe, i); unsigned int page_off = scrub_stripe_get_page_offset(stripe, i); crypto_shash_update(shash, page_address(page) + page_off, fs_info->sectorsize); } crypto_shash_final(shash, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, fs_info->csum_size) != 0) { bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, logical, stripe->mirror_num, CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); return; } if (stripe->sectors[sector_nr].generation != btrfs_stack_header_generation(header)) { bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad generation, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_generation(header), stripe->sectors[sector_nr].generation); return; } bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree); bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); } static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct scrub_sector_verification *sector = &stripe->sectors[sector_nr]; const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; struct page *page = scrub_stripe_get_page(stripe, sector_nr); unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr); u8 csum_buf[BTRFS_CSUM_SIZE]; int ret; ASSERT(sector_nr >= 0 && sector_nr < stripe->nr_sectors); /* Sector not utilized, skip it. */ if (!test_bit(sector_nr, &stripe->extent_sector_bitmap)) return; /* IO error, no need to check. */ if (test_bit(sector_nr, &stripe->io_error_bitmap)) return; /* Metadata, verify the full tree block. */ if (sector->is_metadata) { /* * Check if the tree block crosses the stripe boundary. If * crossed the boundary, we cannot verify it but only give a * warning. * * This can only happen on a very old filesystem where chunks * are not ensured to be stripe aligned. */ if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) { btrfs_warn_rl(fs_info, "tree block at %llu crosses stripe boundary %llu", stripe->logical + (sector_nr << fs_info->sectorsize_bits), stripe->logical); return; } scrub_verify_one_metadata(stripe, sector_nr); return; } /* * Data is easier, we just verify the data csum (if we have it). For * cases without csum, we have no other choice but to trust it. */ if (!sector->csum) { clear_bit(sector_nr, &stripe->error_bitmap); return; } ret = btrfs_check_sector_csum(fs_info, page, pgoff, csum_buf, sector->csum); if (ret < 0) { set_bit(sector_nr, &stripe->csum_error_bitmap); set_bit(sector_nr, &stripe->error_bitmap); } else { clear_bit(sector_nr, &stripe->csum_error_bitmap); clear_bit(sector_nr, &stripe->error_bitmap); } } /* Verify specified sectors of a stripe. */ static void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; int sector_nr; for_each_set_bit(sector_nr, &bitmap, stripe->nr_sectors) { scrub_verify_one_sector(stripe, sector_nr); if (stripe->sectors[sector_nr].is_metadata) sector_nr += sectors_per_tree - 1; } } static int calc_sector_number(struct scrub_stripe *stripe, struct bio_vec *first_bvec) { int i; for (i = 0; i < stripe->nr_sectors; i++) { if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page && scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset) break; } ASSERT(i < stripe->nr_sectors); return i; } /* * Repair read is different to the regular read: * * - Only reads the failed sectors * - May have extra blocksize limits */ static void scrub_repair_read_endio(struct btrfs_bio *bbio) { struct scrub_stripe *stripe = bbio->private; struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct bio_vec *bvec; int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); u32 bio_size = 0; int i; ASSERT(sector_nr < stripe->nr_sectors); bio_for_each_bvec_all(bvec, &bbio->bio, i) bio_size += bvec->bv_len; if (bbio->bio.bi_status) { bitmap_set(&stripe->io_error_bitmap, sector_nr, bio_size >> fs_info->sectorsize_bits); bitmap_set(&stripe->error_bitmap, sector_nr, bio_size >> fs_info->sectorsize_bits); } else { bitmap_clear(&stripe->io_error_bitmap, sector_nr, bio_size >> fs_info->sectorsize_bits); } bio_put(&bbio->bio); if (atomic_dec_and_test(&stripe->pending_io)) wake_up(&stripe->io_wait); } static int calc_next_mirror(int mirror, int num_copies) { ASSERT(mirror <= num_copies); return (mirror + 1 > num_copies) ? 1 : mirror + 1; } static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe, int mirror, int blocksize, bool wait) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; const unsigned long old_error_bitmap = stripe->error_bitmap; int i; ASSERT(stripe->mirror_num >= 1); ASSERT(atomic_read(&stripe->pending_io) == 0); for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) { struct page *page; int pgoff; int ret; page = scrub_stripe_get_page(stripe, i); pgoff = scrub_stripe_get_page_offset(stripe, i); /* The current sector cannot be merged, submit the bio. */ if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) || bbio->bio.bi_iter.bi_size >= blocksize)) { ASSERT(bbio->bio.bi_iter.bi_size); atomic_inc(&stripe->pending_io); btrfs_submit_bbio(bbio, mirror); if (wait) wait_scrub_stripe_io(stripe); bbio = NULL; } if (!bbio) { bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, fs_info, scrub_repair_read_endio, stripe); bbio->bio.bi_iter.bi_sector = (stripe->logical + (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; } ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); ASSERT(ret == fs_info->sectorsize); } if (bbio) { ASSERT(bbio->bio.bi_iter.bi_size); atomic_inc(&stripe->pending_io); btrfs_submit_bbio(bbio, mirror); if (wait) wait_scrub_stripe_io(stripe); } } static void scrub_stripe_report_errors(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_device *dev = NULL; u64 physical = 0; int nr_data_sectors = 0; int nr_meta_sectors = 0; int nr_nodatacsum_sectors = 0; int nr_repaired_sectors = 0; int sector_nr; if (test_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state)) return; /* * Init needed infos for error reporting. * * Although our scrub_stripe infrastructure is mostly based on btrfs_submit_bio() * thus no need for dev/physical, error reporting still needs dev and physical. */ if (!bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) { u64 mapped_len = fs_info->sectorsize; struct btrfs_io_context *bioc = NULL; int stripe_index = stripe->mirror_num - 1; int ret; /* For scrub, our mirror_num should always start at 1. */ ASSERT(stripe->mirror_num >= 1); ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, stripe->logical, &mapped_len, &bioc, NULL, NULL); /* * If we failed, dev will be NULL, and later detailed reports * will just be skipped. */ if (ret < 0) goto skip; physical = bioc->stripes[stripe_index].physical; dev = bioc->stripes[stripe_index].dev; btrfs_put_bioc(bioc); } skip: for_each_set_bit(sector_nr, &stripe->extent_sector_bitmap, stripe->nr_sectors) { bool repaired = false; if (stripe->sectors[sector_nr].is_metadata) { nr_meta_sectors++; } else { nr_data_sectors++; if (!stripe->sectors[sector_nr].csum) nr_nodatacsum_sectors++; } if (test_bit(sector_nr, &stripe->init_error_bitmap) && !test_bit(sector_nr, &stripe->error_bitmap)) { nr_repaired_sectors++; repaired = true; } /* Good sector from the beginning, nothing need to be done. */ if (!test_bit(sector_nr, &stripe->init_error_bitmap)) continue; /* * Report error for the corrupted sectors. If repaired, just * output the message of repaired message. */ if (repaired) { if (dev) { btrfs_err_rl_in_rcu(fs_info, "fixed up error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, "fixed up error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } continue; } /* The remaining are all for unrepaired. */ if (dev) { btrfs_err_rl_in_rcu(fs_info, "unable to fixup (regular) error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, "unable to fixup (regular) error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } if (test_bit(sector_nr, &stripe->io_error_bitmap)) if (__ratelimit(&rs) && dev) scrub_print_common_warning("i/o error", dev, false, stripe->logical, physical); if (test_bit(sector_nr, &stripe->csum_error_bitmap)) if (__ratelimit(&rs) && dev) scrub_print_common_warning("checksum error", dev, false, stripe->logical, physical); if (test_bit(sector_nr, &stripe->meta_error_bitmap)) if (__ratelimit(&rs) && dev) scrub_print_common_warning("header error", dev, false, stripe->logical, physical); } spin_lock(&sctx->stat_lock); sctx->stat.data_extents_scrubbed += stripe->nr_data_extents; sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents; sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits; sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits; sctx->stat.no_csum += nr_nodatacsum_sectors; sctx->stat.read_errors += stripe->init_nr_io_errors; sctx->stat.csum_errors += stripe->init_nr_csum_errors; sctx->stat.verify_errors += stripe->init_nr_meta_errors; sctx->stat.uncorrectable_errors += bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors); sctx->stat.corrected_errors += nr_repaired_sectors; spin_unlock(&sctx->stat_lock); } static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe, unsigned long write_bitmap, bool dev_replace); /* * The main entrance for all read related scrub work, including: * * - Wait for the initial read to finish * - Verify and locate any bad sectors * - Go through the remaining mirrors and try to read as large blocksize as * possible * - Go through all mirrors (including the failed mirror) sector-by-sector * - Submit writeback for repaired sectors * * Writeback for dev-replace does not happen here, it needs extra * synchronization for zoned devices. */ static void scrub_stripe_read_repair_worker(struct work_struct *work) { struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work); struct scrub_ctx *sctx = stripe->sctx; struct btrfs_fs_info *fs_info = sctx->fs_info; int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, stripe->bg->length); unsigned long repaired; int mirror; int i; ASSERT(stripe->mirror_num > 0); wait_scrub_stripe_io(stripe); scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap); /* Save the initial failed bitmap for later repair and report usage. */ stripe->init_error_bitmap = stripe->error_bitmap; stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors); stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors); stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors); if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) goto out; /* * Try all remaining mirrors. * * Here we still try to read as large block as possible, as this is * faster and we have extra safety nets to rely on. */ for (mirror = calc_next_mirror(stripe->mirror_num, num_copies); mirror != stripe->mirror_num; mirror = calc_next_mirror(mirror, num_copies)) { const unsigned long old_error_bitmap = stripe->error_bitmap; scrub_stripe_submit_repair_read(stripe, mirror, BTRFS_STRIPE_LEN, false); wait_scrub_stripe_io(stripe); scrub_verify_one_stripe(stripe, old_error_bitmap); if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) goto out; } /* * Last safety net, try re-checking all mirrors, including the failed * one, sector-by-sector. * * As if one sector failed the drive's internal csum, the whole read * containing the offending sector would be marked as error. * Thus here we do sector-by-sector read. * * This can be slow, thus we only try it as the last resort. */ for (i = 0, mirror = stripe->mirror_num; i < num_copies; i++, mirror = calc_next_mirror(mirror, num_copies)) { const unsigned long old_error_bitmap = stripe->error_bitmap; scrub_stripe_submit_repair_read(stripe, mirror, fs_info->sectorsize, true); wait_scrub_stripe_io(stripe); scrub_verify_one_stripe(stripe, old_error_bitmap); if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) goto out; } out: /* * Submit the repaired sectors. For zoned case, we cannot do repair * in-place, but queue the bg to be relocated. */ bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap, stripe->nr_sectors); if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) { if (btrfs_is_zoned(fs_info)) { btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start); } else { scrub_write_sectors(sctx, stripe, repaired, false); wait_scrub_stripe_io(stripe); } } scrub_stripe_report_errors(sctx, stripe); set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state); wake_up(&stripe->repair_wait); } static void scrub_read_endio(struct btrfs_bio *bbio) { struct scrub_stripe *stripe = bbio->private; struct bio_vec *bvec; int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); int num_sectors; u32 bio_size = 0; int i; ASSERT(sector_nr < stripe->nr_sectors); bio_for_each_bvec_all(bvec, &bbio->bio, i) bio_size += bvec->bv_len; num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits; if (bbio->bio.bi_status) { bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors); bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors); } else { bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors); } bio_put(&bbio->bio); if (atomic_dec_and_test(&stripe->pending_io)) { wake_up(&stripe->io_wait); INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); } } static void scrub_write_endio(struct btrfs_bio *bbio) { struct scrub_stripe *stripe = bbio->private; struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct bio_vec *bvec; int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); u32 bio_size = 0; int i; bio_for_each_bvec_all(bvec, &bbio->bio, i) bio_size += bvec->bv_len; if (bbio->bio.bi_status) { unsigned long flags; spin_lock_irqsave(&stripe->write_error_lock, flags); bitmap_set(&stripe->write_error_bitmap, sector_nr, bio_size >> fs_info->sectorsize_bits); spin_unlock_irqrestore(&stripe->write_error_lock, flags); } bio_put(&bbio->bio); if (atomic_dec_and_test(&stripe->pending_io)) wake_up(&stripe->io_wait); } static void scrub_submit_write_bio(struct scrub_ctx *sctx, struct scrub_stripe *stripe, struct btrfs_bio *bbio, bool dev_replace) { struct btrfs_fs_info *fs_info = sctx->fs_info; u32 bio_len = bbio->bio.bi_iter.bi_size; u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) - stripe->logical; fill_writer_pointer_gap(sctx, stripe->physical + bio_off); atomic_inc(&stripe->pending_io); btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); if (!btrfs_is_zoned(fs_info)) return; /* * For zoned writeback, queue depth must be 1, thus we must wait for * the write to finish before the next write. */ wait_scrub_stripe_io(stripe); /* * And also need to update the write pointer if write finished * successfully. */ if (!test_bit(bio_off >> fs_info->sectorsize_bits, &stripe->write_error_bitmap)) sctx->write_pointer += bio_len; } /* * Submit the write bio(s) for the sectors specified by @write_bitmap. * * Here we utilize btrfs_submit_repair_write(), which has some extra benefits: * * - Only needs logical bytenr and mirror_num * Just like the scrub read path * * - Would only result in writes to the specified mirror * Unlike the regular writeback path, which would write back to all stripes * * - Handle dev-replace and read-repair writeback differently */ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe, unsigned long write_bitmap, bool dev_replace) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; int sector_nr; for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) { struct page *page = scrub_stripe_get_page(stripe, sector_nr); unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr); int ret; /* We should only writeback sectors covered by an extent. */ ASSERT(test_bit(sector_nr, &stripe->extent_sector_bitmap)); /* Cannot merge with previous sector, submit the current one. */ if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) { scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); bbio = NULL; } if (!bbio) { bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_WRITE, fs_info, scrub_write_endio, stripe); bbio->bio.bi_iter.bi_sector = (stripe->logical + (sector_nr << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; } ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); ASSERT(ret == fs_info->sectorsize); } if (bbio) scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); } /* * Throttling of IO submission, bandwidth-limit based, the timeslice is 1 * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max. */ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *device, unsigned int bio_size) { const int time_slice = 1000; s64 delta; ktime_t now; u32 div; u64 bwlimit; bwlimit = READ_ONCE(device->scrub_speed_max); if (bwlimit == 0) return; /* * Slice is divided into intervals when the IO is submitted, adjust by * bwlimit and maximum of 64 intervals. */ div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024))); div = min_t(u32, 64, div); /* Start new epoch, set deadline */ now = ktime_get(); if (sctx->throttle_deadline == 0) { sctx->throttle_deadline = ktime_add_ms(now, time_slice / div); sctx->throttle_sent = 0; } /* Still in the time to send? */ if (ktime_before(now, sctx->throttle_deadline)) { /* If current bio is within the limit, send it */ sctx->throttle_sent += bio_size; if (sctx->throttle_sent <= div_u64(bwlimit, div)) return; /* We're over the limit, sleep until the rest of the slice */ delta = ktime_ms_delta(sctx->throttle_deadline, now); } else { /* New request after deadline, start new epoch */ delta = 0; } if (delta) { long timeout; timeout = div_u64(delta * HZ, 1000); schedule_timeout_interruptible(timeout); } /* Next call will start the deadline period */ sctx->throttle_deadline = 0; } /* * Given a physical address, this will calculate it's * logical offset. if this is a parity stripe, it will return * the most left data stripe's logical offset. * * return 0 if it is a data stripe, 1 means parity stripe. */ static int get_raid56_logic_offset(u64 physical, int num, struct btrfs_chunk_map *map, u64 *offset, u64 *stripe_start) { int i; int j = 0; u64 last_offset; const int data_stripes = nr_data_stripes(map); last_offset = (physical - map->stripes[num].physical) * data_stripes; if (stripe_start) *stripe_start = last_offset; *offset = last_offset; for (i = 0; i < data_stripes; i++) { u32 stripe_nr; u32 stripe_index; u32 rot; *offset = last_offset + btrfs_stripe_nr_to_offset(i); stripe_nr = (u32)(*offset >> BTRFS_STRIPE_LEN_SHIFT) / data_stripes; /* Work out the disk rotation on this stripe-set */ rot = stripe_nr % map->num_stripes; /* calculate which stripe this data locates */ rot += i; stripe_index = rot % map->num_stripes; if (stripe_index == num) return 0; if (stripe_index < num) j++; } *offset = last_offset + btrfs_stripe_nr_to_offset(j); return 1; } /* * Return 0 if the extent item range covers any byte of the range. * Return <0 if the extent item is before @search_start. * Return >0 if the extent item is after @start_start + @search_len. */ static int compare_extent_item_range(struct btrfs_path *path, u64 search_start, u64 search_len) { struct btrfs_fs_info *fs_info = path->nodes[0]->fs_info; u64 len; struct btrfs_key key; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY); if (key.type == BTRFS_METADATA_ITEM_KEY) len = fs_info->nodesize; else len = key.offset; if (key.objectid + len <= search_start) return -1; if (key.objectid >= search_start + search_len) return 1; return 0; } /* * Locate one extent item which covers any byte in range * [@search_start, @search_start + @search_length) * * If the path is not initialized, we will initialize the search by doing * a btrfs_search_slot(). * If the path is already initialized, we will use the path as the initial * slot, to avoid duplicated btrfs_search_slot() calls. * * NOTE: If an extent item starts before @search_start, we will still * return the extent item. This is for data extent crossing stripe boundary. * * Return 0 if we found such extent item, and @path will point to the extent item. * Return >0 if no such extent item can be found, and @path will be released. * Return <0 if hit fatal error, and @path will be released. */ static int find_first_extent_item(struct btrfs_root *extent_root, struct btrfs_path *path, u64 search_start, u64 search_len) { struct btrfs_fs_info *fs_info = extent_root->fs_info; struct btrfs_key key; int ret; /* Continue using the existing path */ if (path->nodes[0]) goto search_forward; if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) key.type = BTRFS_METADATA_ITEM_KEY; else key.type = BTRFS_EXTENT_ITEM_KEY; key.objectid = search_start; key.offset = (u64)-1; ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) return ret; if (ret == 0) { /* * Key with offset -1 found, there would have to exist an extent * item with such offset, but this is out of the valid range. */ btrfs_release_path(path); return -EUCLEAN; } /* * Here we intentionally pass 0 as @min_objectid, as there could be * an extent item starting before @search_start. */ ret = btrfs_previous_extent_item(extent_root, path, 0); if (ret < 0) return ret; /* * No matter whether we have found an extent item, the next loop will * properly do every check on the key. */ search_forward: while (true) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid >= search_start + search_len) break; if (key.type != BTRFS_METADATA_ITEM_KEY && key.type != BTRFS_EXTENT_ITEM_KEY) goto next; ret = compare_extent_item_range(path, search_start, search_len); if (ret == 0) return ret; if (ret > 0) break; next: ret = btrfs_next_item(extent_root, path); if (ret) { /* Either no more items or a fatal error. */ btrfs_release_path(path); return ret; } } btrfs_release_path(path); return 1; } static void get_extent_info(struct btrfs_path *path, u64 *extent_start_ret, u64 *size_ret, u64 *flags_ret, u64 *generation_ret) { struct btrfs_key key; struct btrfs_extent_item *ei; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ASSERT(key.type == BTRFS_METADATA_ITEM_KEY || key.type == BTRFS_EXTENT_ITEM_KEY); *extent_start_ret = key.objectid; if (key.type == BTRFS_METADATA_ITEM_KEY) *size_ret = path->nodes[0]->fs_info->nodesize; else *size_ret = key.offset; ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item); *flags_ret = btrfs_extent_flags(path->nodes[0], ei); *generation_ret = btrfs_extent_generation(path->nodes[0], ei); } static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, u64 physical, u64 physical_end) { struct btrfs_fs_info *fs_info = sctx->fs_info; int ret = 0; if (!btrfs_is_zoned(fs_info)) return 0; mutex_lock(&sctx->wr_lock); if (sctx->write_pointer < physical_end) { ret = btrfs_sync_zone_write_pointer(sctx->wr_tgtdev, logical, physical, sctx->write_pointer); if (ret) btrfs_err(fs_info, "zoned: failed to recover write pointer"); } mutex_unlock(&sctx->wr_lock); btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical); return ret; } static void fill_one_extent_info(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe, u64 extent_start, u64 extent_len, u64 extent_flags, u64 extent_gen) { for (u64 cur_logical = max(stripe->logical, extent_start); cur_logical < min(stripe->logical + BTRFS_STRIPE_LEN, extent_start + extent_len); cur_logical += fs_info->sectorsize) { const int nr_sector = (cur_logical - stripe->logical) >> fs_info->sectorsize_bits; struct scrub_sector_verification *sector = &stripe->sectors[nr_sector]; set_bit(nr_sector, &stripe->extent_sector_bitmap); if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { sector->is_metadata = true; sector->generation = extent_gen; } } } static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) { stripe->extent_sector_bitmap = 0; stripe->init_error_bitmap = 0; stripe->init_nr_io_errors = 0; stripe->init_nr_csum_errors = 0; stripe->init_nr_meta_errors = 0; stripe->error_bitmap = 0; stripe->io_error_bitmap = 0; stripe->csum_error_bitmap = 0; stripe->meta_error_bitmap = 0; } /* * Locate one stripe which has at least one extent in its range. * * Return 0 if found such stripe, and store its info into @stripe. * Return >0 if there is no such stripe in the specified range. * Return <0 for error. */ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, struct btrfs_path *extent_path, struct btrfs_path *csum_path, struct btrfs_device *dev, u64 physical, int mirror_num, u64 logical_start, u32 logical_len, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = bg->fs_info; struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start); struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start); const u64 logical_end = logical_start + logical_len; u64 cur_logical = logical_start; u64 stripe_end; u64 extent_start; u64 extent_len; u64 extent_flags; u64 extent_gen; int ret; if (unlikely(!extent_root)) { btrfs_err(fs_info, "no valid extent root for scrub"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * stripe->nr_sectors); scrub_stripe_reset_bitmaps(stripe); /* The range must be inside the bg. */ ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length); ret = find_first_extent_item(extent_root, extent_path, logical_start, logical_len); /* Either error or not found. */ if (ret) goto out; get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags, &extent_gen); if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) stripe->nr_meta_extents++; if (extent_flags & BTRFS_EXTENT_FLAG_DATA) stripe->nr_data_extents++; cur_logical = max(extent_start, cur_logical); /* * Round down to stripe boundary. * * The extra calculation against bg->start is to handle block groups * whose logical bytenr is not BTRFS_STRIPE_LEN aligned. */ stripe->logical = round_down(cur_logical - bg->start, BTRFS_STRIPE_LEN) + bg->start; stripe->physical = physical + stripe->logical - logical_start; stripe->dev = dev; stripe->bg = bg; stripe->mirror_num = mirror_num; stripe_end = stripe->logical + BTRFS_STRIPE_LEN - 1; /* Fill the first extent info into stripe->sectors[] array. */ fill_one_extent_info(fs_info, stripe, extent_start, extent_len, extent_flags, extent_gen); cur_logical = extent_start + extent_len; /* Fill the extent info for the remaining sectors. */ while (cur_logical <= stripe_end) { ret = find_first_extent_item(extent_root, extent_path, cur_logical, stripe_end - cur_logical + 1); if (ret < 0) goto out; if (ret > 0) { ret = 0; break; } get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags, &extent_gen); if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) stripe->nr_meta_extents++; if (extent_flags & BTRFS_EXTENT_FLAG_DATA) stripe->nr_data_extents++; fill_one_extent_info(fs_info, stripe, extent_start, extent_len, extent_flags, extent_gen); cur_logical = extent_start + extent_len; } /* Now fill the data csum. */ if (bg->flags & BTRFS_BLOCK_GROUP_DATA) { int sector_nr; unsigned long csum_bitmap = 0; /* Csum space should have already been allocated. */ ASSERT(stripe->csums); /* * Our csum bitmap should be large enough, as BTRFS_STRIPE_LEN * should contain at most 16 sectors. */ ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); ret = btrfs_lookup_csums_bitmap(csum_root, csum_path, stripe->logical, stripe_end, stripe->csums, &csum_bitmap); if (ret < 0) goto out; if (ret > 0) ret = 0; for_each_set_bit(sector_nr, &csum_bitmap, stripe->nr_sectors) { stripe->sectors[sector_nr].csum = stripe->csums + sector_nr * fs_info->csum_size; } } set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state); out: return ret; } static void scrub_reset_stripe(struct scrub_stripe *stripe) { scrub_stripe_reset_bitmaps(stripe); stripe->nr_meta_extents = 0; stripe->nr_data_extents = 0; stripe->state = 0; for (int i = 0; i < stripe->nr_sectors; i++) { stripe->sectors[i].is_metadata = false; stripe->sectors[i].csum = NULL; stripe->sectors[i].generation = 0; } } static u32 stripe_length(const struct scrub_stripe *stripe) { ASSERT(stripe->bg); return min(BTRFS_STRIPE_LEN, stripe->bg->start + stripe->bg->length - stripe->logical); } static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; atomic_inc(&stripe->pending_io); for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) { struct page *page = scrub_stripe_get_page(stripe, i); unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i); /* We're beyond the chunk boundary, no need to read anymore. */ if (i >= nr_sectors) break; /* The current sector cannot be merged, submit the bio. */ if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->extent_sector_bitmap)) || bbio->bio.bi_iter.bi_size >= stripe_len)) { ASSERT(bbio->bio.bi_iter.bi_size); atomic_inc(&stripe->pending_io); btrfs_submit_bbio(bbio, mirror); bbio = NULL; } if (!bbio) { struct btrfs_io_stripe io_stripe = {}; struct btrfs_io_context *bioc = NULL; const u64 logical = stripe->logical + (i << fs_info->sectorsize_bits); int err; io_stripe.rst_search_commit_root = true; stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits; /* * For RST cases, we need to manually split the bbio to * follow the RST boundary. */ err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, &stripe_len, &bioc, &io_stripe, &mirror); btrfs_put_bioc(bioc); if (err < 0) { if (err != -ENODATA) { /* * Earlier btrfs_get_raid_extent_offset() * returned -ENODATA, which means there's * no entry for the corresponding range * in the stripe tree. But if it's in * the extent tree, then it's a preallocated * extent and not an error. */ set_bit(i, &stripe->io_error_bitmap); set_bit(i, &stripe->error_bitmap); } continue; } bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, fs_info, scrub_read_endio, stripe); bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT; } __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); } if (bbio) { ASSERT(bbio->bio.bi_iter.bi_size); atomic_inc(&stripe->pending_io); btrfs_submit_bbio(bbio, mirror); } if (atomic_dec_and_test(&stripe->pending_io)) { wake_up(&stripe->io_wait); INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); } } static void scrub_submit_initial_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); ASSERT(stripe->mirror_num > 0); ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state)); if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) { scrub_submit_extent_sector_read(stripe); return; } bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, scrub_read_endio, stripe); bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT; /* Read the whole range inside the chunk boundary. */ for (unsigned int cur = 0; cur < nr_sectors; cur++) { struct page *page = scrub_stripe_get_page(stripe, cur); unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur); int ret; ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); /* We should have allocated enough bio vectors. */ ASSERT(ret == fs_info->sectorsize); } atomic_inc(&stripe->pending_io); /* * For dev-replace, either user asks to avoid the source dev, or * the device is missing, we try the next mirror instead. */ if (sctx->is_dev_replace && (fs_info->dev_replace.cont_reading_from_srcdev_mode == BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID || !stripe->dev->bdev)) { int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, stripe->bg->length); mirror = calc_next_mirror(mirror, num_copies); } btrfs_submit_bbio(bbio, mirror); } static bool stripe_has_metadata_error(struct scrub_stripe *stripe) { int i; for_each_set_bit(i, &stripe->error_bitmap, stripe->nr_sectors) { if (stripe->sectors[i].is_metadata) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; btrfs_err(fs_info, "stripe %llu has unrepaired metadata sector at %llu", stripe->logical, stripe->logical + (i << fs_info->sectorsize_bits)); return true; } } return false; } static void submit_initial_group_read(struct scrub_ctx *sctx, unsigned int first_slot, unsigned int nr_stripes) { struct blk_plug plug; ASSERT(first_slot < SCRUB_TOTAL_STRIPES); ASSERT(first_slot + nr_stripes <= SCRUB_TOTAL_STRIPES); scrub_throttle_dev_io(sctx, sctx->stripes[0].dev, btrfs_stripe_nr_to_offset(nr_stripes)); blk_start_plug(&plug); for (int i = 0; i < nr_stripes; i++) { struct scrub_stripe *stripe = &sctx->stripes[first_slot + i]; /* Those stripes should be initialized. */ ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state)); scrub_submit_initial_read(sctx, stripe); } blk_finish_plug(&plug); } static int flush_scrub_stripes(struct scrub_ctx *sctx) { struct btrfs_fs_info *fs_info = sctx->fs_info; struct scrub_stripe *stripe; const int nr_stripes = sctx->cur_stripe; int ret = 0; if (!nr_stripes) return 0; ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state)); /* Submit the stripes which are populated but not submitted. */ if (nr_stripes % SCRUB_STRIPES_PER_GROUP) { const int first_slot = round_down(nr_stripes, SCRUB_STRIPES_PER_GROUP); submit_initial_group_read(sctx, first_slot, nr_stripes - first_slot); } for (int i = 0; i < nr_stripes; i++) { stripe = &sctx->stripes[i]; wait_event(stripe->repair_wait, test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state)); } /* Submit for dev-replace. */ if (sctx->is_dev_replace) { /* * For dev-replace, if we know there is something wrong with * metadata, we should immediately abort. */ for (int i = 0; i < nr_stripes; i++) { if (stripe_has_metadata_error(&sctx->stripes[i])) { ret = -EIO; goto out; } } for (int i = 0; i < nr_stripes; i++) { unsigned long good; stripe = &sctx->stripes[i]; ASSERT(stripe->dev == fs_info->dev_replace.srcdev); bitmap_andnot(&good, &stripe->extent_sector_bitmap, &stripe->error_bitmap, stripe->nr_sectors); scrub_write_sectors(sctx, stripe, good, true); } } /* Wait for the above writebacks to finish. */ for (int i = 0; i < nr_stripes; i++) { stripe = &sctx->stripes[i]; wait_scrub_stripe_io(stripe); spin_lock(&sctx->stat_lock); sctx->stat.last_physical = stripe->physical + stripe_length(stripe); spin_unlock(&sctx->stat_lock); scrub_reset_stripe(stripe); } out: sctx->cur_stripe = 0; return ret; } static void raid56_scrub_wait_endio(struct bio *bio) { complete(bio->bi_private); } static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg, struct btrfs_device *dev, int mirror_num, u64 logical, u32 length, u64 physical, u64 *found_logical_ret) { struct scrub_stripe *stripe; int ret; /* * There should always be one slot left, as caller filling the last * slot should flush them all. */ ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES); /* @found_logical_ret must be specified. */ ASSERT(found_logical_ret); stripe = &sctx->stripes[sctx->cur_stripe]; scrub_reset_stripe(stripe); ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, &sctx->csum_path, dev, physical, mirror_num, logical, length, stripe); /* Either >0 as no more extents or <0 for error. */ if (ret) return ret; *found_logical_ret = stripe->logical; sctx->cur_stripe++; /* We filled one group, submit it. */ if (sctx->cur_stripe % SCRUB_STRIPES_PER_GROUP == 0) { const int first_slot = sctx->cur_stripe - SCRUB_STRIPES_PER_GROUP; submit_initial_group_read(sctx, first_slot, SCRUB_STRIPES_PER_GROUP); } /* Last slot used, flush them all. */ if (sctx->cur_stripe == SCRUB_TOTAL_STRIPES) return flush_scrub_stripes(sctx); return 0; } static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, struct btrfs_block_group *bg, struct btrfs_chunk_map *map, u64 full_stripe_start) { DECLARE_COMPLETION_ONSTACK(io_done); struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_raid_bio *rbio; struct btrfs_io_context *bioc = NULL; struct btrfs_path extent_path = { 0 }; struct btrfs_path csum_path = { 0 }; struct bio *bio; struct scrub_stripe *stripe; bool all_empty = true; const int data_stripes = nr_data_stripes(map); unsigned long extent_bitmap = 0; u64 length = btrfs_stripe_nr_to_offset(data_stripes); int ret; ASSERT(sctx->raid56_data_stripes); /* * For data stripe search, we cannot reuse the same extent/csum paths, * as the data stripe bytenr may be smaller than previous extent. Thus * we have to use our own extent/csum paths. */ extent_path.search_commit_root = 1; extent_path.skip_locking = 1; csum_path.search_commit_root = 1; csum_path.skip_locking = 1; for (int i = 0; i < data_stripes; i++) { int stripe_index; int rot; u64 physical; stripe = &sctx->raid56_data_stripes[i]; rot = div_u64(full_stripe_start - bg->start, data_stripes) >> BTRFS_STRIPE_LEN_SHIFT; stripe_index = (i + rot) % map->num_stripes; physical = map->stripes[stripe_index].physical + btrfs_stripe_nr_to_offset(rot); scrub_reset_stripe(stripe); set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state); ret = scrub_find_fill_first_stripe(bg, &extent_path, &csum_path, map->stripes[stripe_index].dev, physical, 1, full_stripe_start + btrfs_stripe_nr_to_offset(i), BTRFS_STRIPE_LEN, stripe); if (ret < 0) goto out; /* * No extent in this data stripe, need to manually mark them * initialized to make later read submission happy. */ if (ret > 0) { stripe->logical = full_stripe_start + btrfs_stripe_nr_to_offset(i); stripe->dev = map->stripes[stripe_index].dev; stripe->mirror_num = 1; set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state); } } /* Check if all data stripes are empty. */ for (int i = 0; i < data_stripes; i++) { stripe = &sctx->raid56_data_stripes[i]; if (!bitmap_empty(&stripe->extent_sector_bitmap, stripe->nr_sectors)) { all_empty = false; break; } } if (all_empty) { ret = 0; goto out; } for (int i = 0; i < data_stripes; i++) { stripe = &sctx->raid56_data_stripes[i]; scrub_submit_initial_read(sctx, stripe); } for (int i = 0; i < data_stripes; i++) { stripe = &sctx->raid56_data_stripes[i]; wait_event(stripe->repair_wait, test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state)); } /* For now, no zoned support for RAID56. */ ASSERT(!btrfs_is_zoned(sctx->fs_info)); /* * Now all data stripes are properly verified. Check if we have any * unrepaired, if so abort immediately or we could further corrupt the * P/Q stripes. * * During the loop, also populate extent_bitmap. */ for (int i = 0; i < data_stripes; i++) { unsigned long error; stripe = &sctx->raid56_data_stripes[i]; /* * We should only check the errors where there is an extent. * As we may hit an empty data stripe while it's missing. */ bitmap_and(&error, &stripe->error_bitmap, &stripe->extent_sector_bitmap, stripe->nr_sectors); if (!bitmap_empty(&error, stripe->nr_sectors)) { btrfs_err(fs_info, "unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", full_stripe_start, i, stripe->nr_sectors, &error); ret = -EIO; goto out; } bitmap_or(&extent_bitmap, &extent_bitmap, &stripe->extent_sector_bitmap, stripe->nr_sectors); } /* Now we can check and regenerate the P/Q stripe. */ bio = bio_alloc(NULL, 1, REQ_OP_READ, GFP_NOFS); bio->bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT; bio->bi_private = &io_done; bio->bi_end_io = raid56_scrub_wait_endio; btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, &length, &bioc, NULL, NULL); if (ret < 0) { btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); goto out; } rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, scrub_dev, &extent_bitmap, BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); btrfs_put_bioc(bioc); if (!rbio) { ret = -ENOMEM; btrfs_bio_counter_dec(fs_info); goto out; } /* Use the recovered stripes as cache to avoid read them from disk again. */ for (int i = 0; i < data_stripes; i++) { stripe = &sctx->raid56_data_stripes[i]; raid56_parity_cache_data_pages(rbio, stripe->pages, full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT)); } raid56_parity_submit_scrub_rbio(rbio); wait_for_completion_io(&io_done); ret = blk_status_to_errno(bio->bi_status); bio_put(bio); btrfs_bio_counter_dec(fs_info); btrfs_release_path(&extent_path); btrfs_release_path(&csum_path); out: return ret; } /* * Scrub one range which can only has simple mirror based profile. * (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in * RAID0/RAID10). * * Since we may need to handle a subset of block group, we need @logical_start * and @logical_length parameter. */ static int scrub_simple_mirror(struct scrub_ctx *sctx, struct btrfs_block_group *bg, u64 logical_start, u64 logical_length, struct btrfs_device *device, u64 physical, int mirror_num) { struct btrfs_fs_info *fs_info = sctx->fs_info; const u64 logical_end = logical_start + logical_length; u64 cur_logical = logical_start; int ret = 0; /* The range must be inside the bg */ ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length); /* Go through each extent items inside the logical range */ while (cur_logical < logical_end) { u64 found_logical = U64_MAX; u64 cur_physical = physical + cur_logical - logical_start; /* Canceled? */ if (atomic_read(&fs_info->scrub_cancel_req) || atomic_read(&sctx->cancel_req)) { ret = -ECANCELED; break; } /* Paused? */ if (atomic_read(&fs_info->scrub_pause_req)) { /* Push queued extents */ scrub_blocked_if_needed(fs_info); } /* Block group removed? */ spin_lock(&bg->lock); if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) { spin_unlock(&bg->lock); ret = 0; break; } spin_unlock(&bg->lock); ret = queue_scrub_stripe(sctx, bg, device, mirror_num, cur_logical, logical_end - cur_logical, cur_physical, &found_logical); if (ret > 0) { /* No more extent, just update the accounting */ spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical + logical_length; spin_unlock(&sctx->stat_lock); ret = 0; break; } if (ret < 0) break; /* queue_scrub_stripe() returned 0, @found_logical must be updated. */ ASSERT(found_logical != U64_MAX); cur_logical = found_logical + BTRFS_STRIPE_LEN; /* Don't hold CPU for too long time */ cond_resched(); } return ret; } /* Calculate the full stripe length for simple stripe based profiles */ static u64 simple_stripe_full_stripe_len(const struct btrfs_chunk_map *map) { ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)); return btrfs_stripe_nr_to_offset(map->num_stripes / map->sub_stripes); } /* Get the logical bytenr for the stripe */ static u64 simple_stripe_get_logical(struct btrfs_chunk_map *map, struct btrfs_block_group *bg, int stripe_index) { ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)); ASSERT(stripe_index < map->num_stripes); /* * (stripe_index / sub_stripes) gives how many data stripes we need to * skip. */ return btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes) + bg->start; } /* Get the mirror number for the stripe */ static int simple_stripe_mirror_num(struct btrfs_chunk_map *map, int stripe_index) { ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)); ASSERT(stripe_index < map->num_stripes); /* For RAID0, it's fixed to 1, for RAID10 it's 0,1,0,1... */ return stripe_index % map->sub_stripes + 1; } static int scrub_simple_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct btrfs_device *device, int stripe_index) { const u64 logical_increment = simple_stripe_full_stripe_len(map); const u64 orig_logical = simple_stripe_get_logical(map, bg, stripe_index); const u64 orig_physical = map->stripes[stripe_index].physical; const int mirror_num = simple_stripe_mirror_num(map, stripe_index); u64 cur_logical = orig_logical; u64 cur_physical = orig_physical; int ret = 0; while (cur_logical < bg->start + bg->length) { /* * Inside each stripe, RAID0 is just SINGLE, and RAID10 is * just RAID1, so we can reuse scrub_simple_mirror() to scrub * this stripe. */ ret = scrub_simple_mirror(sctx, bg, cur_logical, BTRFS_STRIPE_LEN, device, cur_physical, mirror_num); if (ret) return ret; /* Skip to next stripe which belongs to the target device */ cur_logical += logical_increment; /* For physical offset, we just go to next stripe */ cur_physical += BTRFS_STRIPE_LEN; } return ret; } static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct btrfs_device *scrub_dev, int stripe_index) { struct btrfs_fs_info *fs_info = sctx->fs_info; const u64 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK; const u64 chunk_logical = bg->start; int ret; int ret2; u64 physical = map->stripes[stripe_index].physical; const u64 dev_stripe_len = btrfs_calc_stripe_length(map); const u64 physical_end = physical + dev_stripe_len; u64 logical; u64 logic_end; /* The logical increment after finishing one stripe */ u64 increment; /* Offset inside the chunk */ u64 offset; u64 stripe_logical; /* Extent_path should be released by now. */ ASSERT(sctx->extent_path.nodes[0] == NULL); scrub_blocked_if_needed(fs_info); if (sctx->is_dev_replace && btrfs_dev_is_sequential(sctx->wr_tgtdev, physical)) { mutex_lock(&sctx->wr_lock); sctx->write_pointer = physical; mutex_unlock(&sctx->wr_lock); } /* Prepare the extra data stripes used by RAID56. */ if (profile & BTRFS_BLOCK_GROUP_RAID56_MASK) { ASSERT(sctx->raid56_data_stripes == NULL); sctx->raid56_data_stripes = kcalloc(nr_data_stripes(map), sizeof(struct scrub_stripe), GFP_KERNEL); if (!sctx->raid56_data_stripes) { ret = -ENOMEM; goto out; } for (int i = 0; i < nr_data_stripes(map); i++) { ret = init_scrub_stripe(fs_info, &sctx->raid56_data_stripes[i]); if (ret < 0) goto out; sctx->raid56_data_stripes[i].bg = bg; sctx->raid56_data_stripes[i].sctx = sctx; } } /* * There used to be a big double loop to handle all profiles using the * same routine, which grows larger and more gross over time. * * So here we handle each profile differently, so simpler profiles * have simpler scrubbing function. */ if (!(profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_RAID56_MASK))) { /* * Above check rules out all complex profile, the remaining * profiles are SINGLE|DUP|RAID1|RAID1C*, which is simple * mirrored duplication without stripe. * * Only @physical and @mirror_num needs to calculated using * @stripe_index. */ ret = scrub_simple_mirror(sctx, bg, bg->start, bg->length, scrub_dev, map->stripes[stripe_index].physical, stripe_index + 1); offset = 0; goto out; } if (profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { ret = scrub_simple_stripe(sctx, bg, map, scrub_dev, stripe_index); offset = btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes); goto out; } /* Only RAID56 goes through the old code */ ASSERT(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK); ret = 0; /* Calculate the logical end of the stripe */ get_raid56_logic_offset(physical_end, stripe_index, map, &logic_end, NULL); logic_end += chunk_logical; /* Initialize @offset in case we need to go to out: label */ get_raid56_logic_offset(physical, stripe_index, map, &offset, NULL); increment = btrfs_stripe_nr_to_offset(nr_data_stripes(map)); /* * Due to the rotation, for RAID56 it's better to iterate each stripe * using their physical offset. */ while (physical < physical_end) { ret = get_raid56_logic_offset(physical, stripe_index, map, &logical, &stripe_logical); logical += chunk_logical; if (ret) { /* it is parity strip */ stripe_logical += chunk_logical; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, map, stripe_logical); spin_lock(&sctx->stat_lock); sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN, physical_end); spin_unlock(&sctx->stat_lock); if (ret) goto out; goto next; } /* * Now we're at a data stripe, scrub each extents in the range. * * At this stage, if we ignore the repair part, inside each data * stripe it is no different than SINGLE profile. * We can reuse scrub_simple_mirror() here, as the repair part * is still based on @mirror_num. */ ret = scrub_simple_mirror(sctx, bg, logical, BTRFS_STRIPE_LEN, scrub_dev, physical, 1); if (ret < 0) goto out; next: logical += increment; physical += BTRFS_STRIPE_LEN; spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical; spin_unlock(&sctx->stat_lock); } out: ret2 = flush_scrub_stripes(sctx); if (!ret) ret = ret2; btrfs_release_path(&sctx->extent_path); btrfs_release_path(&sctx->csum_path); if (sctx->raid56_data_stripes) { for (int i = 0; i < nr_data_stripes(map); i++) release_scrub_stripe(&sctx->raid56_data_stripes[i]); kfree(sctx->raid56_data_stripes); sctx->raid56_data_stripes = NULL; } if (sctx->is_dev_replace && ret >= 0) { int ret2; ret2 = sync_write_pointer_for_zoned(sctx, chunk_logical + offset, map->stripes[stripe_index].physical, physical_end); if (ret2) ret = ret2; } return ret < 0 ? ret : 0; } static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_block_group *bg, struct btrfs_device *scrub_dev, u64 dev_offset, u64 dev_extent_len) { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_chunk_map *map; int i; int ret = 0; map = btrfs_find_chunk_map(fs_info, bg->start, bg->length); if (!map) { /* * Might have been an unused block group deleted by the cleaner * kthread or relocation. */ spin_lock(&bg->lock); if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) ret = -EINVAL; spin_unlock(&bg->lock); return ret; } if (map->start != bg->start) goto out; if (map->chunk_len < dev_extent_len) goto out; for (i = 0; i < map->num_stripes; ++i) { if (map->stripes[i].dev->bdev == scrub_dev->bdev && map->stripes[i].physical == dev_offset) { ret = scrub_stripe(sctx, bg, map, scrub_dev, i); if (ret) goto out; } } out: btrfs_free_chunk_map(map); return ret; } static int finish_extent_writes_for_zoned(struct btrfs_root *root, struct btrfs_block_group *cache) { struct btrfs_fs_info *fs_info = cache->fs_info; if (!btrfs_is_zoned(fs_info)) return 0; btrfs_wait_block_group_reservations(cache); btrfs_wait_nocow_writers(cache); btrfs_wait_ordered_roots(fs_info, U64_MAX, cache); return btrfs_commit_current_transaction(root); } static noinline_for_stack int scrub_enumerate_chunks(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, u64 start, u64 end) { struct btrfs_dev_extent *dev_extent = NULL; struct btrfs_path *path; struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_root *root = fs_info->dev_root; u64 chunk_offset; int ret = 0; int ro_set; int slot; struct extent_buffer *l; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_block_group *cache; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; path = btrfs_alloc_path(); if (!path) return -ENOMEM; path->reada = READA_FORWARD; path->search_commit_root = 1; path->skip_locking = 1; key.objectid = scrub_dev->devid; key.offset = 0ull; key.type = BTRFS_DEV_EXTENT_KEY; while (1) { u64 dev_extent_len; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) break; if (ret > 0) { if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret < 0) break; if (ret > 0) { ret = 0; break; } } else { ret = 0; } } l = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(l, &found_key, slot); if (found_key.objectid != scrub_dev->devid) break; if (found_key.type != BTRFS_DEV_EXTENT_KEY) break; if (found_key.offset >= end) break; if (found_key.offset < key.offset) break; dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); dev_extent_len = btrfs_dev_extent_length(l, dev_extent); if (found_key.offset + dev_extent_len <= start) goto skip; chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); /* * get a reference on the corresponding block group to prevent * the chunk from going away while we scrub it */ cache = btrfs_lookup_block_group(fs_info, chunk_offset); /* some chunks are removed but not committed to disk yet, * continue scrubbing */ if (!cache) goto skip; ASSERT(cache->start <= chunk_offset); /* * We are using the commit root to search for device extents, so * that means we could have found a device extent item from a * block group that was deleted in the current transaction. The * logical start offset of the deleted block group, stored at * @chunk_offset, might be part of the logical address range of * a new block group (which uses different physical extents). * In this case btrfs_lookup_block_group() has returned the new * block group, and its start address is less than @chunk_offset. * * We skip such new block groups, because it's pointless to * process them, as we won't find their extents because we search * for them using the commit root of the extent tree. For a device * replace it's also fine to skip it, we won't miss copying them * to the target device because we have the write duplication * setup through the regular write path (by btrfs_map_block()), * and we have committed a transaction when we started the device * replace, right after setting up the device replace state. */ if (cache->start < chunk_offset) { btrfs_put_block_group(cache); goto skip; } if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) { btrfs_put_block_group(cache); goto skip; } } /* * Make sure that while we are scrubbing the corresponding block * group doesn't get its logical address and its device extents * reused for another block group, which can possibly be of a * different type and different profile. We do this to prevent * false error detections and crashes due to bogus attempts to * repair extents. */ spin_lock(&cache->lock); if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) { spin_unlock(&cache->lock); btrfs_put_block_group(cache); goto skip; } btrfs_freeze_block_group(cache); spin_unlock(&cache->lock); /* * we need call btrfs_inc_block_group_ro() with scrubs_paused, * to avoid deadlock caused by: * btrfs_inc_block_group_ro() * -> btrfs_wait_for_commit() * -> btrfs_commit_transaction() * -> btrfs_scrub_pause() */ scrub_pause_on(fs_info); /* * Don't do chunk preallocation for scrub. * * This is especially important for SYSTEM bgs, or we can hit * -EFBIG from btrfs_finish_chunk_alloc() like: * 1. The only SYSTEM bg is marked RO. * Since SYSTEM bg is small, that's pretty common. * 2. New SYSTEM bg will be allocated * Due to regular version will allocate new chunk. * 3. New SYSTEM bg is empty and will get cleaned up * Before cleanup really happens, it's marked RO again. * 4. Empty SYSTEM bg get scrubbed * We go back to 2. * * This can easily boost the amount of SYSTEM chunks if cleaner * thread can't be triggered fast enough, and use up all space * of btrfs_super_block::sys_chunk_array * * While for dev replace, we need to try our best to mark block * group RO, to prevent race between: * - Write duplication * Contains latest data * - Scrub copy * Contains data from commit tree * * If target block group is not marked RO, nocow writes can * be overwritten by scrub copy, causing data corruption. * So for dev-replace, it's not allowed to continue if a block * group is not RO. */ ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace); if (!ret && sctx->is_dev_replace) { ret = finish_extent_writes_for_zoned(root, cache); if (ret) { btrfs_dec_block_group_ro(cache); scrub_pause_off(fs_info); btrfs_put_block_group(cache); break; } } if (ret == 0) { ro_set = 1; } else if (ret == -ENOSPC && !sctx->is_dev_replace && !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) { /* * btrfs_inc_block_group_ro return -ENOSPC when it * failed in creating new chunk for metadata. * It is not a problem for scrub, because * metadata are always cowed, and our scrub paused * commit_transactions. * * For RAID56 chunks, we have to mark them read-only * for scrub, as later we would use our own cache * out of RAID56 realm. * Thus we want the RAID56 bg to be marked RO to * prevent RMW from screwing up out cache. */ ro_set = 0; } else if (ret == -ETXTBSY) { btrfs_warn(fs_info, "skipping scrub of block group %llu due to active swapfile", cache->start); scrub_pause_off(fs_info); ret = 0; goto skip_unfreeze; } else { btrfs_warn(fs_info, "failed setting block group ro: %d", ret); btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); scrub_pause_off(fs_info); break; } /* * Now the target block is marked RO, wait for nocow writes to * finish before dev-replace. * COW is fine, as COW never overwrites extents in commit tree. */ if (sctx->is_dev_replace) { btrfs_wait_nocow_writers(cache); btrfs_wait_ordered_roots(fs_info, U64_MAX, cache); } scrub_pause_off(fs_info); down_write(&dev_replace->rwsem); dev_replace->cursor_right = found_key.offset + dev_extent_len; dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; up_write(&dev_replace->rwsem); ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset, dev_extent_len); if (sctx->is_dev_replace && !btrfs_finish_block_group_to_copy(dev_replace->srcdev, cache, found_key.offset)) ro_set = 0; down_write(&dev_replace->rwsem); dev_replace->cursor_left = dev_replace->cursor_right; dev_replace->item_needs_writeback = 1; up_write(&dev_replace->rwsem); if (ro_set) btrfs_dec_block_group_ro(cache); /* * We might have prevented the cleaner kthread from deleting * this block group if it was already unused because we raced * and set it to RO mode first. So add it back to the unused * list, otherwise it might not ever be deleted unless a manual * balance is triggered or it becomes used and unused again. */ spin_lock(&cache->lock); if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags) && !cache->ro && cache->reserved == 0 && cache->used == 0) { spin_unlock(&cache->lock); if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) btrfs_discard_queue_work(&fs_info->discard_ctl, cache); else btrfs_mark_bg_unused(cache); } else { spin_unlock(&cache->lock); } skip_unfreeze: btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); if (ret) break; if (sctx->is_dev_replace && atomic64_read(&dev_replace->num_write_errors) > 0) { ret = -EIO; break; } if (sctx->stat.malloc_errors > 0) { ret = -ENOMEM; break; } skip: key.offset = found_key.offset + dev_extent_len; btrfs_release_path(path); } btrfs_free_path(path); return ret; } static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev, struct page *page, u64 physical, u64 generation) { struct btrfs_fs_info *fs_info = sctx->fs_info; struct bio_vec bvec; struct bio bio; struct btrfs_super_block *sb = page_address(page); int ret; bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_READ); bio.bi_iter.bi_sector = physical >> SECTOR_SHIFT; __bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE, 0); ret = submit_bio_wait(&bio); bio_uninit(&bio); if (ret < 0) return ret; ret = btrfs_check_super_csum(fs_info, sb); if (ret != 0) { btrfs_err_rl(fs_info, "super block at physical %llu devid %llu has bad csum", physical, dev->devid); return -EIO; } if (btrfs_super_generation(sb) != generation) { btrfs_err_rl(fs_info, "super block at physical %llu devid %llu has bad generation %llu expect %llu", physical, dev->devid, btrfs_super_generation(sb), generation); return -EUCLEAN; } return btrfs_validate_super(fs_info, sb, -1); } static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev) { int i; u64 bytenr; u64 gen; int ret = 0; struct page *page; struct btrfs_fs_info *fs_info = sctx->fs_info; if (BTRFS_FS_ERROR(fs_info)) return -EROFS; page = alloc_page(GFP_KERNEL); if (!page) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; spin_unlock(&sctx->stat_lock); return -ENOMEM; } /* Seed devices of a new filesystem has their own generation. */ if (scrub_dev->fs_devices != fs_info->fs_devices) gen = scrub_dev->generation; else gen = btrfs_get_last_trans_committed(fs_info); for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr); if (ret == -ENOENT) break; if (ret) { spin_lock(&sctx->stat_lock); sctx->stat.super_errors++; spin_unlock(&sctx->stat_lock); continue; } if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->commit_total_bytes) break; if (!btrfs_check_super_location(scrub_dev, bytenr)) continue; ret = scrub_one_super(sctx, scrub_dev, page, bytenr, gen); if (ret) { spin_lock(&sctx->stat_lock); sctx->stat.super_errors++; spin_unlock(&sctx->stat_lock); } } __free_page(page); return 0; } static void scrub_workers_put(struct btrfs_fs_info *fs_info) { if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt, &fs_info->scrub_lock)) { struct workqueue_struct *scrub_workers = fs_info->scrub_workers; fs_info->scrub_workers = NULL; mutex_unlock(&fs_info->scrub_lock); if (scrub_workers) destroy_workqueue(scrub_workers); } } /* * get a reference count on fs_info->scrub_workers. start worker if necessary */ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info) { struct workqueue_struct *scrub_workers = NULL; unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; int max_active = fs_info->thread_pool_size; int ret = -ENOMEM; if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt)) return 0; scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active); if (!scrub_workers) return -ENOMEM; mutex_lock(&fs_info->scrub_lock); if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) { ASSERT(fs_info->scrub_workers == NULL); fs_info->scrub_workers = scrub_workers; refcount_set(&fs_info->scrub_workers_refcnt, 1); mutex_unlock(&fs_info->scrub_lock); return 0; } /* Other thread raced in and created the workers for us */ refcount_inc(&fs_info->scrub_workers_refcnt); mutex_unlock(&fs_info->scrub_lock); ret = 0; destroy_workqueue(scrub_workers); return ret; } int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, int readonly, int is_dev_replace) { struct btrfs_dev_lookup_args args = { .devid = devid }; struct scrub_ctx *sctx; int ret; struct btrfs_device *dev; unsigned int nofs_flag; bool need_commit = false; if (btrfs_fs_closing(fs_info)) return -EAGAIN; /* At mount time we have ensured nodesize is in the range of [4K, 64K]. */ ASSERT(fs_info->nodesize <= BTRFS_STRIPE_LEN); /* * SCRUB_MAX_SECTORS_PER_BLOCK is calculated using the largest possible * value (max nodesize / min sectorsize), thus nodesize should always * be fine. */ ASSERT(fs_info->nodesize <= SCRUB_MAX_SECTORS_PER_BLOCK << fs_info->sectorsize_bits); /* Allocate outside of device_list_mutex */ sctx = scrub_setup_ctx(fs_info, is_dev_replace); if (IS_ERR(sctx)) return PTR_ERR(sctx); ret = scrub_workers_get(fs_info); if (ret) goto out_free_ctx; mutex_lock(&fs_info->fs_devices->device_list_mutex); dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) && !is_dev_replace)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); ret = -ENODEV; goto out; } if (!is_dev_replace && !readonly && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_err_in_rcu(fs_info, "scrub on devid %llu: filesystem on %s is not writable", devid, btrfs_dev_name(dev)); ret = -EROFS; goto out; } mutex_lock(&fs_info->scrub_lock); if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) { mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); ret = -EIO; goto out; } down_read(&fs_info->dev_replace.rwsem); if (dev->scrub_ctx || (!is_dev_replace && btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { up_read(&fs_info->dev_replace.rwsem); mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); ret = -EINPROGRESS; goto out; } up_read(&fs_info->dev_replace.rwsem); sctx->readonly = readonly; dev->scrub_ctx = sctx; mutex_unlock(&fs_info->fs_devices->device_list_mutex); /* * checking @scrub_pause_req here, we can avoid * race between committing transaction and scrubbing. */ __scrub_blocked_if_needed(fs_info); atomic_inc(&fs_info->scrubs_running); mutex_unlock(&fs_info->scrub_lock); /* * In order to avoid deadlock with reclaim when there is a transaction * trying to pause scrub, make sure we use GFP_NOFS for all the * allocations done at btrfs_scrub_sectors() and scrub_sectors_for_parity() * invoked by our callees. The pausing request is done when the * transaction commit starts, and it blocks the transaction until scrub * is paused (done at specific points at scrub_stripe() or right above * before incrementing fs_info->scrubs_running). */ nofs_flag = memalloc_nofs_save(); if (!is_dev_replace) { u64 old_super_errors; spin_lock(&sctx->stat_lock); old_super_errors = sctx->stat.super_errors; spin_unlock(&sctx->stat_lock); btrfs_info(fs_info, "scrub: started on devid %llu", devid); /* * by holding device list mutex, we can * kick off writing super in log tree sync. */ mutex_lock(&fs_info->fs_devices->device_list_mutex); ret = scrub_supers(sctx, dev); mutex_unlock(&fs_info->fs_devices->device_list_mutex); spin_lock(&sctx->stat_lock); /* * Super block errors found, but we can not commit transaction * at current context, since btrfs_commit_transaction() needs * to pause the current running scrub (hold by ourselves). */ if (sctx->stat.super_errors > old_super_errors && !sctx->readonly) need_commit = true; spin_unlock(&sctx->stat_lock); } if (!ret) ret = scrub_enumerate_chunks(sctx, dev, start, end); memalloc_nofs_restore(nofs_flag); atomic_dec(&fs_info->scrubs_running); wake_up(&fs_info->scrub_pause_wait); if (progress) memcpy(progress, &sctx->stat, sizeof(*progress)); if (!is_dev_replace) btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d", ret ? "not finished" : "finished", devid, ret); mutex_lock(&fs_info->scrub_lock); dev->scrub_ctx = NULL; mutex_unlock(&fs_info->scrub_lock); scrub_workers_put(fs_info); scrub_put_ctx(sctx); /* * We found some super block errors before, now try to force a * transaction commit, as scrub has finished. */ if (need_commit) { struct btrfs_trans_handle *trans; trans = btrfs_start_transaction(fs_info->tree_root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_err(fs_info, "scrub: failed to start transaction to fix super block errors: %d", ret); return ret; } ret = btrfs_commit_transaction(trans); if (ret < 0) btrfs_err(fs_info, "scrub: failed to commit transaction to fix super block errors: %d", ret); } return ret; out: scrub_workers_put(fs_info); out_free_ctx: scrub_free_ctx(sctx); return ret; } void btrfs_scrub_pause(struct btrfs_fs_info *fs_info) { mutex_lock(&fs_info->scrub_lock); atomic_inc(&fs_info->scrub_pause_req); while (atomic_read(&fs_info->scrubs_paused) != atomic_read(&fs_info->scrubs_running)) { mutex_unlock(&fs_info->scrub_lock); wait_event(fs_info->scrub_pause_wait, atomic_read(&fs_info->scrubs_paused) == atomic_read(&fs_info->scrubs_running)); mutex_lock(&fs_info->scrub_lock); } mutex_unlock(&fs_info->scrub_lock); } void btrfs_scrub_continue(struct btrfs_fs_info *fs_info) { atomic_dec(&fs_info->scrub_pause_req); wake_up(&fs_info->scrub_pause_wait); } int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) { mutex_lock(&fs_info->scrub_lock); if (!atomic_read(&fs_info->scrubs_running)) { mutex_unlock(&fs_info->scrub_lock); return -ENOTCONN; } atomic_inc(&fs_info->scrub_cancel_req); while (atomic_read(&fs_info->scrubs_running)) { mutex_unlock(&fs_info->scrub_lock); wait_event(fs_info->scrub_pause_wait, atomic_read(&fs_info->scrubs_running) == 0); mutex_lock(&fs_info->scrub_lock); } atomic_dec(&fs_info->scrub_cancel_req); mutex_unlock(&fs_info->scrub_lock); return 0; } int btrfs_scrub_cancel_dev(struct btrfs_device *dev) { struct btrfs_fs_info *fs_info = dev->fs_info; struct scrub_ctx *sctx; mutex_lock(&fs_info->scrub_lock); sctx = dev->scrub_ctx; if (!sctx) { mutex_unlock(&fs_info->scrub_lock); return -ENOTCONN; } atomic_inc(&sctx->cancel_req); while (dev->scrub_ctx) { mutex_unlock(&fs_info->scrub_lock); wait_event(fs_info->scrub_pause_wait, dev->scrub_ctx == NULL); mutex_lock(&fs_info->scrub_lock); } mutex_unlock(&fs_info->scrub_lock); return 0; } int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, struct btrfs_scrub_progress *progress) { struct btrfs_dev_lookup_args args = { .devid = devid }; struct btrfs_device *dev; struct scrub_ctx *sctx = NULL; mutex_lock(&fs_info->fs_devices->device_list_mutex); dev = btrfs_find_device(fs_info->fs_devices, &args); if (dev) sctx = dev->scrub_ctx; if (sctx) memcpy(progress, &sctx->stat, sizeof(*progress)); mutex_unlock(&fs_info->fs_devices->device_list_mutex); return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV; }
1 1 1 3 3 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 // SPDX-License-Identifier: GPL-2.0-or-later /* * MIDI 2.0 support */ #include <linux/bitops.h> #include <linux/string.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/wait.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/usb/audio.h> #include <linux/usb/midi.h> #include <linux/usb/midi-v2.h> #include <sound/core.h> #include <sound/control.h> #include <sound/ump.h> #include "usbaudio.h" #include "midi.h" #include "midi2.h" #include "helper.h" static bool midi2_enable = true; module_param(midi2_enable, bool, 0444); MODULE_PARM_DESC(midi2_enable, "Enable MIDI 2.0 support."); static bool midi2_ump_probe = true; module_param(midi2_ump_probe, bool, 0444); MODULE_PARM_DESC(midi2_ump_probe, "Probe UMP v1.1 support at first."); /* stream direction; just shorter names */ enum { STR_OUT = SNDRV_RAWMIDI_STREAM_OUTPUT, STR_IN = SNDRV_RAWMIDI_STREAM_INPUT }; #define NUM_URBS 8 struct snd_usb_midi2_urb; struct snd_usb_midi2_endpoint; struct snd_usb_midi2_ump; struct snd_usb_midi2_interface; /* URB context */ struct snd_usb_midi2_urb { struct urb *urb; struct snd_usb_midi2_endpoint *ep; unsigned int index; /* array index */ }; /* A USB MIDI input/output endpoint */ struct snd_usb_midi2_endpoint { struct usb_device *dev; const struct usb_ms20_endpoint_descriptor *ms_ep; /* reference to EP descriptor */ struct snd_usb_midi2_endpoint *pair; /* bidirectional pair EP */ struct snd_usb_midi2_ump *rmidi; /* assigned UMP EP pair */ struct snd_ump_endpoint *ump; /* assigned UMP EP */ int direction; /* direction (STR_IN/OUT) */ unsigned int endpoint; /* EP number */ unsigned int pipe; /* URB pipe */ unsigned int packets; /* packet buffer size in bytes */ unsigned int interval; /* interval for INT EP */ wait_queue_head_t wait; /* URB waiter */ spinlock_t lock; /* URB locking */ struct snd_rawmidi_substream *substream; /* NULL when closed */ unsigned int num_urbs; /* number of allocated URBs */ unsigned long urb_free; /* bitmap for free URBs */ unsigned long urb_free_mask; /* bitmask for free URBs */ atomic_t running; /* running status */ atomic_t suspended; /* saved running status for suspend */ bool disconnected; /* shadow of umidi->disconnected */ struct list_head list; /* list to umidi->ep_list */ struct snd_usb_midi2_urb urbs[NUM_URBS]; }; /* A UMP endpoint - one or two USB MIDI endpoints are assigned */ struct snd_usb_midi2_ump { struct usb_device *dev; struct snd_usb_midi2_interface *umidi; /* reference to MIDI iface */ struct snd_ump_endpoint *ump; /* assigned UMP EP object */ struct snd_usb_midi2_endpoint *eps[2]; /* USB MIDI endpoints */ int index; /* rawmidi device index */ unsigned char usb_block_id; /* USB GTB id used for finding a pair */ bool ump_parsed; /* Parsed UMP 1.1 EP/FB info*/ struct list_head list; /* list to umidi->rawmidi_list */ }; /* top-level instance per USB MIDI interface */ struct snd_usb_midi2_interface { struct snd_usb_audio *chip; /* assigned USB-audio card */ struct usb_interface *iface; /* assigned USB interface */ struct usb_host_interface *hostif; const char *blk_descs; /* group terminal block descriptors */ unsigned int blk_desc_size; /* size of GTB descriptors */ bool disconnected; struct list_head ep_list; /* list of endpoints */ struct list_head rawmidi_list; /* list of UMP rawmidis */ struct list_head list; /* list to chip->midi_v2_list */ }; /* submit URBs as much as possible; used for both input and output */ static void do_submit_urbs_locked(struct snd_usb_midi2_endpoint *ep, int (*prepare)(struct snd_usb_midi2_endpoint *, struct urb *)) { struct snd_usb_midi2_urb *ctx; int index, err = 0; if (ep->disconnected) return; while (ep->urb_free) { index = find_first_bit(&ep->urb_free, ep->num_urbs); if (index >= ep->num_urbs) return; ctx = &ep->urbs[index]; err = prepare(ep, ctx->urb); if (err < 0) return; if (!ctx->urb->transfer_buffer_length) return; ctx->urb->dev = ep->dev; err = usb_submit_urb(ctx->urb, GFP_ATOMIC); if (err < 0) { dev_dbg(&ep->dev->dev, "usb_submit_urb error %d\n", err); return; } clear_bit(index, &ep->urb_free); } } /* prepare for output submission: copy from rawmidi buffer to urb packet */ static int prepare_output_urb(struct snd_usb_midi2_endpoint *ep, struct urb *urb) { int count; count = snd_ump_transmit(ep->ump, urb->transfer_buffer, ep->packets); if (count < 0) { dev_dbg(&ep->dev->dev, "rawmidi transmit error %d\n", count); return count; } cpu_to_le32_array((u32 *)urb->transfer_buffer, count >> 2); urb->transfer_buffer_length = count; return 0; } static void submit_output_urbs_locked(struct snd_usb_midi2_endpoint *ep) { do_submit_urbs_locked(ep, prepare_output_urb); } /* URB completion for output; re-filling and re-submit */ static void output_urb_complete(struct urb *urb) { struct snd_usb_midi2_urb *ctx = urb->context; struct snd_usb_midi2_endpoint *ep = ctx->ep; unsigned long flags; spin_lock_irqsave(&ep->lock, flags); set_bit(ctx->index, &ep->urb_free); if (urb->status >= 0 && atomic_read(&ep->running)) submit_output_urbs_locked(ep); if (ep->urb_free == ep->urb_free_mask) wake_up(&ep->wait); spin_unlock_irqrestore(&ep->lock, flags); } /* prepare for input submission: just set the buffer length */ static int prepare_input_urb(struct snd_usb_midi2_endpoint *ep, struct urb *urb) { urb->transfer_buffer_length = ep->packets; return 0; } static void submit_input_urbs_locked(struct snd_usb_midi2_endpoint *ep) { do_submit_urbs_locked(ep, prepare_input_urb); } /* URB completion for input; copy into rawmidi buffer and resubmit */ static void input_urb_complete(struct urb *urb) { struct snd_usb_midi2_urb *ctx = urb->context; struct snd_usb_midi2_endpoint *ep = ctx->ep; unsigned long flags; int len; spin_lock_irqsave(&ep->lock, flags); if (ep->disconnected || urb->status < 0) goto dequeue; len = urb->actual_length; len &= ~3; /* align UMP */ if (len > ep->packets) len = ep->packets; if (len > 0) { le32_to_cpu_array((u32 *)urb->transfer_buffer, len >> 2); snd_ump_receive(ep->ump, (u32 *)urb->transfer_buffer, len); } dequeue: set_bit(ctx->index, &ep->urb_free); submit_input_urbs_locked(ep); if (ep->urb_free == ep->urb_free_mask) wake_up(&ep->wait); spin_unlock_irqrestore(&ep->lock, flags); } /* URB submission helper; for both direction */ static void submit_io_urbs(struct snd_usb_midi2_endpoint *ep) { unsigned long flags; if (!ep) return; spin_lock_irqsave(&ep->lock, flags); if (ep->direction == STR_IN) submit_input_urbs_locked(ep); else submit_output_urbs_locked(ep); spin_unlock_irqrestore(&ep->lock, flags); } /* kill URBs for close, suspend and disconnect */ static void kill_midi_urbs(struct snd_usb_midi2_endpoint *ep, bool suspending) { int i; if (!ep) return; if (suspending) ep->suspended = ep->running; atomic_set(&ep->running, 0); for (i = 0; i < ep->num_urbs; i++) { if (!ep->urbs[i].urb) break; usb_kill_urb(ep->urbs[i].urb); } } /* wait until all URBs get freed */ static void drain_urb_queue(struct snd_usb_midi2_endpoint *ep) { if (!ep) return; spin_lock_irq(&ep->lock); atomic_set(&ep->running, 0); wait_event_lock_irq_timeout(ep->wait, ep->disconnected || ep->urb_free == ep->urb_free_mask, ep->lock, msecs_to_jiffies(500)); spin_unlock_irq(&ep->lock); } /* release URBs for an EP */ static void free_midi_urbs(struct snd_usb_midi2_endpoint *ep) { struct snd_usb_midi2_urb *ctx; int i; if (!ep) return; for (i = 0; i < NUM_URBS; ++i) { ctx = &ep->urbs[i]; if (!ctx->urb) break; usb_free_coherent(ep->dev, ep->packets, ctx->urb->transfer_buffer, ctx->urb->transfer_dma); usb_free_urb(ctx->urb); ctx->urb = NULL; } ep->num_urbs = 0; } /* allocate URBs for an EP */ /* the callers should handle allocation errors via free_midi_urbs() */ static int alloc_midi_urbs(struct snd_usb_midi2_endpoint *ep) { struct snd_usb_midi2_urb *ctx; void (*comp)(struct urb *urb); void *buffer; int i, err; int endpoint, len; endpoint = ep->endpoint; len = ep->packets; if (ep->direction == STR_IN) comp = input_urb_complete; else comp = output_urb_complete; ep->num_urbs = 0; ep->urb_free = ep->urb_free_mask = 0; for (i = 0; i < NUM_URBS; i++) { ctx = &ep->urbs[i]; ctx->index = i; ctx->urb = usb_alloc_urb(0, GFP_KERNEL); if (!ctx->urb) { dev_err(&ep->dev->dev, "URB alloc failed\n"); return -ENOMEM; } ctx->ep = ep; buffer = usb_alloc_coherent(ep->dev, len, GFP_KERNEL, &ctx->urb->transfer_dma); if (!buffer) { dev_err(&ep->dev->dev, "URB buffer alloc failed (size %d)\n", len); return -ENOMEM; } if (ep->interval) usb_fill_int_urb(ctx->urb, ep->dev, ep->pipe, buffer, len, comp, ctx, ep->interval); else usb_fill_bulk_urb(ctx->urb, ep->dev, ep->pipe, buffer, len, comp, ctx); err = usb_urb_ep_type_check(ctx->urb); if (err < 0) { dev_err(&ep->dev->dev, "invalid MIDI EP %x\n", endpoint); return err; } ctx->urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; ep->num_urbs++; } ep->urb_free = ep->urb_free_mask = GENMASK(ep->num_urbs - 1, 0); return 0; } static struct snd_usb_midi2_endpoint * ump_to_endpoint(struct snd_ump_endpoint *ump, int dir) { struct snd_usb_midi2_ump *rmidi = ump->private_data; return rmidi->eps[dir]; } /* ump open callback */ static int snd_usb_midi_v2_open(struct snd_ump_endpoint *ump, int dir) { struct snd_usb_midi2_endpoint *ep = ump_to_endpoint(ump, dir); int err = 0; if (!ep || !ep->endpoint) return -ENODEV; if (ep->disconnected) return -EIO; if (ep->direction == STR_OUT) { err = alloc_midi_urbs(ep); if (err) { free_midi_urbs(ep); return err; } } return 0; } /* ump close callback */ static void snd_usb_midi_v2_close(struct snd_ump_endpoint *ump, int dir) { struct snd_usb_midi2_endpoint *ep = ump_to_endpoint(ump, dir); if (ep->direction == STR_OUT) { kill_midi_urbs(ep, false); drain_urb_queue(ep); free_midi_urbs(ep); } } /* ump trigger callback */ static void snd_usb_midi_v2_trigger(struct snd_ump_endpoint *ump, int dir, int up) { struct snd_usb_midi2_endpoint *ep = ump_to_endpoint(ump, dir); atomic_set(&ep->running, up); if (up && ep->direction == STR_OUT && !ep->disconnected) submit_io_urbs(ep); } /* ump drain callback */ static void snd_usb_midi_v2_drain(struct snd_ump_endpoint *ump, int dir) { struct snd_usb_midi2_endpoint *ep = ump_to_endpoint(ump, dir); drain_urb_queue(ep); } /* allocate and start all input streams */ static int start_input_streams(struct snd_usb_midi2_interface *umidi) { struct snd_usb_midi2_endpoint *ep; int err; list_for_each_entry(ep, &umidi->ep_list, list) { if (ep->direction == STR_IN) { err = alloc_midi_urbs(ep); if (err < 0) goto error; } } list_for_each_entry(ep, &umidi->ep_list, list) { if (ep->direction == STR_IN) submit_io_urbs(ep); } return 0; error: list_for_each_entry(ep, &umidi->ep_list, list) { if (ep->direction == STR_IN) free_midi_urbs(ep); } return err; } static const struct snd_ump_ops snd_usb_midi_v2_ump_ops = { .open = snd_usb_midi_v2_open, .close = snd_usb_midi_v2_close, .trigger = snd_usb_midi_v2_trigger, .drain = snd_usb_midi_v2_drain, }; /* create a USB MIDI 2.0 endpoint object */ static int create_midi2_endpoint(struct snd_usb_midi2_interface *umidi, struct usb_host_endpoint *hostep, const struct usb_ms20_endpoint_descriptor *ms_ep) { struct snd_usb_midi2_endpoint *ep; int endpoint, dir; usb_audio_dbg(umidi->chip, "Creating an EP 0x%02x, #GTB=%d\n", hostep->desc.bEndpointAddress, ms_ep->bNumGrpTrmBlock); ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (!ep) return -ENOMEM; spin_lock_init(&ep->lock); init_waitqueue_head(&ep->wait); ep->dev = umidi->chip->dev; endpoint = hostep->desc.bEndpointAddress; dir = (endpoint & USB_DIR_IN) ? STR_IN : STR_OUT; ep->endpoint = endpoint; ep->direction = dir; ep->ms_ep = ms_ep; if (usb_endpoint_xfer_int(&hostep->desc)) ep->interval = hostep->desc.bInterval; else ep->interval = 0; if (dir == STR_IN) { if (ep->interval) ep->pipe = usb_rcvintpipe(ep->dev, endpoint); else ep->pipe = usb_rcvbulkpipe(ep->dev, endpoint); } else { if (ep->interval) ep->pipe = usb_sndintpipe(ep->dev, endpoint); else ep->pipe = usb_sndbulkpipe(ep->dev, endpoint); } ep->packets = usb_maxpacket(ep->dev, ep->pipe); list_add_tail(&ep->list, &umidi->ep_list); return 0; } /* destructor for endpoint; from snd_usb_midi_v2_free() */ static void free_midi2_endpoint(struct snd_usb_midi2_endpoint *ep) { list_del(&ep->list); free_midi_urbs(ep); kfree(ep); } /* call all endpoint destructors */ static void free_all_midi2_endpoints(struct snd_usb_midi2_interface *umidi) { struct snd_usb_midi2_endpoint *ep; while (!list_empty(&umidi->ep_list)) { ep = list_first_entry(&umidi->ep_list, struct snd_usb_midi2_endpoint, list); free_midi2_endpoint(ep); } } /* find a MIDI STREAMING descriptor with a given subtype */ static void *find_usb_ms_endpoint_descriptor(struct usb_host_endpoint *hostep, unsigned char subtype) { unsigned char *extra = hostep->extra; int extralen = hostep->extralen; while (extralen > 3) { struct usb_ms_endpoint_descriptor *ms_ep = (struct usb_ms_endpoint_descriptor *)extra; if (ms_ep->bLength > 3 && ms_ep->bDescriptorType == USB_DT_CS_ENDPOINT && ms_ep->bDescriptorSubtype == subtype) return ms_ep; if (!extra[0]) break; extralen -= extra[0]; extra += extra[0]; } return NULL; } /* get the full group terminal block descriptors and return the size */ static int get_group_terminal_block_descs(struct snd_usb_midi2_interface *umidi) { struct usb_host_interface *hostif = umidi->hostif; struct usb_device *dev = umidi->chip->dev; struct usb_ms20_gr_trm_block_header_descriptor header = { 0 }; unsigned char *data; int err, size; err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_GET_DESCRIPTOR, USB_RECIP_INTERFACE | USB_TYPE_STANDARD | USB_DIR_IN, USB_DT_CS_GR_TRM_BLOCK << 8 | hostif->desc.bAlternateSetting, hostif->desc.bInterfaceNumber, &header, sizeof(header)); if (err < 0) return err; size = __le16_to_cpu(header.wTotalLength); if (!size) { dev_err(&dev->dev, "Failed to get GTB descriptors for %d:%d\n", hostif->desc.bInterfaceNumber, hostif->desc.bAlternateSetting); return -EINVAL; } data = kzalloc(size, GFP_KERNEL); if (!data) return -ENOMEM; err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_GET_DESCRIPTOR, USB_RECIP_INTERFACE | USB_TYPE_STANDARD | USB_DIR_IN, USB_DT_CS_GR_TRM_BLOCK << 8 | hostif->desc.bAlternateSetting, hostif->desc.bInterfaceNumber, data, size); if (err < 0) { kfree(data); return err; } umidi->blk_descs = data; umidi->blk_desc_size = size; return 0; } /* find the corresponding group terminal block descriptor */ static const struct usb_ms20_gr_trm_block_descriptor * find_group_terminal_block(struct snd_usb_midi2_interface *umidi, int id) { const unsigned char *data = umidi->blk_descs; int size = umidi->blk_desc_size; const struct usb_ms20_gr_trm_block_descriptor *desc; size -= sizeof(struct usb_ms20_gr_trm_block_header_descriptor); data += sizeof(struct usb_ms20_gr_trm_block_header_descriptor); while (size > 0 && *data && *data <= size) { desc = (const struct usb_ms20_gr_trm_block_descriptor *)data; if (desc->bLength >= sizeof(*desc) && desc->bDescriptorType == USB_DT_CS_GR_TRM_BLOCK && desc->bDescriptorSubtype == USB_MS_GR_TRM_BLOCK && desc->bGrpTrmBlkID == id) return desc; size -= *data; data += *data; } return NULL; } /* fill up the information from GTB */ static int parse_group_terminal_block(struct snd_usb_midi2_ump *rmidi, const struct usb_ms20_gr_trm_block_descriptor *desc) { struct snd_ump_endpoint *ump = rmidi->ump; unsigned int protocol, protocol_caps; /* set default protocol */ switch (desc->bMIDIProtocol) { case USB_MS_MIDI_PROTO_1_0_64: case USB_MS_MIDI_PROTO_1_0_64_JRTS: case USB_MS_MIDI_PROTO_1_0_128: case USB_MS_MIDI_PROTO_1_0_128_JRTS: protocol = SNDRV_UMP_EP_INFO_PROTO_MIDI1; break; case USB_MS_MIDI_PROTO_2_0: case USB_MS_MIDI_PROTO_2_0_JRTS: protocol = SNDRV_UMP_EP_INFO_PROTO_MIDI2; break; default: return 0; } if (!ump->info.protocol) ump->info.protocol = protocol; protocol_caps = protocol; switch (desc->bMIDIProtocol) { case USB_MS_MIDI_PROTO_1_0_64_JRTS: case USB_MS_MIDI_PROTO_1_0_128_JRTS: case USB_MS_MIDI_PROTO_2_0_JRTS: protocol_caps |= SNDRV_UMP_EP_INFO_PROTO_JRTS_TX | SNDRV_UMP_EP_INFO_PROTO_JRTS_RX; break; } ump->info.protocol_caps |= protocol_caps; return 0; } /* allocate and parse for each assigned group terminal block */ static int parse_group_terminal_blocks(struct snd_usb_midi2_interface *umidi) { struct snd_usb_midi2_ump *rmidi; const struct usb_ms20_gr_trm_block_descriptor *desc; int err; err = get_group_terminal_block_descs(umidi); if (err < 0) return err; if (!umidi->blk_descs) return 0; list_for_each_entry(rmidi, &umidi->rawmidi_list, list) { desc = find_group_terminal_block(umidi, rmidi->usb_block_id); if (!desc) continue; err = parse_group_terminal_block(rmidi, desc); if (err < 0) return err; } return 0; } /* parse endpoints included in the given interface and create objects */ static int parse_midi_2_0_endpoints(struct snd_usb_midi2_interface *umidi) { struct usb_host_interface *hostif = umidi->hostif; struct usb_host_endpoint *hostep; struct usb_ms20_endpoint_descriptor *ms_ep; int i, err; for (i = 0; i < hostif->desc.bNumEndpoints; i++) { hostep = &hostif->endpoint[i]; if (!usb_endpoint_xfer_bulk(&hostep->desc) && !usb_endpoint_xfer_int(&hostep->desc)) continue; ms_ep = find_usb_ms_endpoint_descriptor(hostep, USB_MS_GENERAL_2_0); if (!ms_ep) continue; if (ms_ep->bLength <= sizeof(*ms_ep)) continue; if (!ms_ep->bNumGrpTrmBlock) continue; if (ms_ep->bLength < sizeof(*ms_ep) + ms_ep->bNumGrpTrmBlock) continue; err = create_midi2_endpoint(umidi, hostep, ms_ep); if (err < 0) return err; } return 0; } static void free_all_midi2_umps(struct snd_usb_midi2_interface *umidi) { struct snd_usb_midi2_ump *rmidi; while (!list_empty(&umidi->rawmidi_list)) { rmidi = list_first_entry(&umidi->rawmidi_list, struct snd_usb_midi2_ump, list); list_del(&rmidi->list); kfree(rmidi); } } static int create_midi2_ump(struct snd_usb_midi2_interface *umidi, struct snd_usb_midi2_endpoint *ep_in, struct snd_usb_midi2_endpoint *ep_out, int blk_id) { struct snd_usb_midi2_ump *rmidi; struct snd_ump_endpoint *ump; int input, output; char idstr[16]; int err; rmidi = kzalloc(sizeof(*rmidi), GFP_KERNEL); if (!rmidi) return -ENOMEM; INIT_LIST_HEAD(&rmidi->list); rmidi->dev = umidi->chip->dev; rmidi->umidi = umidi; rmidi->usb_block_id = blk_id; rmidi->index = umidi->chip->num_rawmidis; snprintf(idstr, sizeof(idstr), "UMP %d", rmidi->index); input = ep_in ? 1 : 0; output = ep_out ? 1 : 0; err = snd_ump_endpoint_new(umidi->chip->card, idstr, rmidi->index, output, input, &ump); if (err < 0) { usb_audio_dbg(umidi->chip, "Failed to create a UMP object\n"); kfree(rmidi); return err; } rmidi->ump = ump; umidi->chip->num_rawmidis++; ump->private_data = rmidi; ump->ops = &snd_usb_midi_v2_ump_ops; rmidi->eps[STR_IN] = ep_in; rmidi->eps[STR_OUT] = ep_out; if (ep_in) { ep_in->pair = ep_out; ep_in->rmidi = rmidi; ep_in->ump = ump; } if (ep_out) { ep_out->pair = ep_in; ep_out->rmidi = rmidi; ep_out->ump = ump; } list_add_tail(&rmidi->list, &umidi->rawmidi_list); return 0; } /* find the UMP EP with the given USB block id */ static struct snd_usb_midi2_ump * find_midi2_ump(struct snd_usb_midi2_interface *umidi, int blk_id) { struct snd_usb_midi2_ump *rmidi; list_for_each_entry(rmidi, &umidi->rawmidi_list, list) { if (rmidi->usb_block_id == blk_id) return rmidi; } return NULL; } /* look for the matching output endpoint and create UMP object if found */ static int find_matching_ep_partner(struct snd_usb_midi2_interface *umidi, struct snd_usb_midi2_endpoint *ep, int blk_id) { struct snd_usb_midi2_endpoint *pair_ep; int blk; usb_audio_dbg(umidi->chip, "Looking for a pair for EP-in 0x%02x\n", ep->endpoint); list_for_each_entry(pair_ep, &umidi->ep_list, list) { if (pair_ep->direction != STR_OUT) continue; if (pair_ep->pair) continue; /* already paired */ for (blk = 0; blk < pair_ep->ms_ep->bNumGrpTrmBlock; blk++) { if (pair_ep->ms_ep->baAssoGrpTrmBlkID[blk] == blk_id) { usb_audio_dbg(umidi->chip, "Found a match with EP-out 0x%02x blk %d\n", pair_ep->endpoint, blk); return create_midi2_ump(umidi, ep, pair_ep, blk_id); } } } return 0; } /* Call UMP helper to parse UMP endpoints; * this needs to be called after starting the input streams for bi-directional * communications */ static int parse_ump_endpoints(struct snd_usb_midi2_interface *umidi) { struct snd_usb_midi2_ump *rmidi; int err; list_for_each_entry(rmidi, &umidi->rawmidi_list, list) { if (!rmidi->ump || !(rmidi->ump->core.info_flags & SNDRV_RAWMIDI_INFO_DUPLEX)) continue; err = snd_ump_parse_endpoint(rmidi->ump); if (!err) { rmidi->ump_parsed = true; } else { if (err == -ENOMEM) return err; /* fall back to GTB later */ } } return 0; } /* create a UMP block from a GTB entry */ static int create_gtb_block(struct snd_usb_midi2_ump *rmidi, int dir, int blk) { struct snd_usb_midi2_interface *umidi = rmidi->umidi; const struct usb_ms20_gr_trm_block_descriptor *desc; struct snd_ump_block *fb; int type, err; desc = find_group_terminal_block(umidi, blk); if (!desc) return 0; usb_audio_dbg(umidi->chip, "GTB %d: type=%d, group=%d/%d, protocol=%d, in bw=%d, out bw=%d\n", blk, desc->bGrpTrmBlkType, desc->nGroupTrm, desc->nNumGroupTrm, desc->bMIDIProtocol, __le16_to_cpu(desc->wMaxInputBandwidth), __le16_to_cpu(desc->wMaxOutputBandwidth)); /* assign the direction */ switch (desc->bGrpTrmBlkType) { case USB_MS_GR_TRM_BLOCK_TYPE_BIDIRECTIONAL: type = SNDRV_UMP_DIR_BIDIRECTION; break; case USB_MS_GR_TRM_BLOCK_TYPE_INPUT_ONLY: type = SNDRV_UMP_DIR_INPUT; break; case USB_MS_GR_TRM_BLOCK_TYPE_OUTPUT_ONLY: type = SNDRV_UMP_DIR_OUTPUT; break; default: usb_audio_dbg(umidi->chip, "Unsupported GTB type %d\n", desc->bGrpTrmBlkType); return 0; /* unsupported */ } /* guess work: set blk-1 as the (0-based) block ID */ err = snd_ump_block_new(rmidi->ump, blk - 1, type, desc->nGroupTrm, desc->nNumGroupTrm, &fb); if (err == -EBUSY) return 0; /* already present */ else if (err) return err; if (desc->iBlockItem) usb_string(rmidi->dev, desc->iBlockItem, fb->info.name, sizeof(fb->info.name)); if (__le16_to_cpu(desc->wMaxInputBandwidth) == 1 || __le16_to_cpu(desc->wMaxOutputBandwidth) == 1) fb->info.flags |= SNDRV_UMP_BLOCK_IS_MIDI1 | SNDRV_UMP_BLOCK_IS_LOWSPEED; /* if MIDI 2.0 protocol is supported and yet the GTB shows MIDI 1.0, * treat it as a MIDI 1.0-specific block */ if (rmidi->ump->info.protocol_caps & SNDRV_UMP_EP_INFO_PROTO_MIDI2) { switch (desc->bMIDIProtocol) { case USB_MS_MIDI_PROTO_1_0_64: case USB_MS_MIDI_PROTO_1_0_64_JRTS: case USB_MS_MIDI_PROTO_1_0_128: case USB_MS_MIDI_PROTO_1_0_128_JRTS: fb->info.flags |= SNDRV_UMP_BLOCK_IS_MIDI1; break; } } snd_ump_update_group_attrs(rmidi->ump); usb_audio_dbg(umidi->chip, "Created a UMP block %d from GTB, name=%s, flags=0x%x\n", blk, fb->info.name, fb->info.flags); return 0; } /* Create UMP blocks for each UMP EP */ static int create_blocks_from_gtb(struct snd_usb_midi2_interface *umidi) { struct snd_usb_midi2_ump *rmidi; int i, blk, err, dir; list_for_each_entry(rmidi, &umidi->rawmidi_list, list) { if (!rmidi->ump) continue; /* Blocks have been already created? */ if (rmidi->ump_parsed || rmidi->ump->info.num_blocks) continue; /* GTB is static-only */ rmidi->ump->info.flags |= SNDRV_UMP_EP_INFO_STATIC_BLOCKS; /* loop over GTBs */ for (dir = 0; dir < 2; dir++) { if (!rmidi->eps[dir]) continue; for (i = 0; i < rmidi->eps[dir]->ms_ep->bNumGrpTrmBlock; i++) { blk = rmidi->eps[dir]->ms_ep->baAssoGrpTrmBlkID[i]; err = create_gtb_block(rmidi, dir, blk); if (err < 0) return err; } } } return 0; } /* attach legacy rawmidis */ static int attach_legacy_rawmidi(struct snd_usb_midi2_interface *umidi) { #if IS_ENABLED(CONFIG_SND_UMP_LEGACY_RAWMIDI) struct snd_usb_midi2_ump *rmidi; int err; list_for_each_entry(rmidi, &umidi->rawmidi_list, list) { err = snd_ump_attach_legacy_rawmidi(rmidi->ump, "Legacy MIDI", umidi->chip->num_rawmidis); if (err < 0) return err; umidi->chip->num_rawmidis++; } #endif return 0; } static void snd_usb_midi_v2_free(struct snd_usb_midi2_interface *umidi) { free_all_midi2_endpoints(umidi); free_all_midi2_umps(umidi); list_del(&umidi->list); kfree(umidi->blk_descs); kfree(umidi); } /* parse the interface for MIDI 2.0 */ static int parse_midi_2_0(struct snd_usb_midi2_interface *umidi) { struct snd_usb_midi2_endpoint *ep; int blk, id, err; /* First, create an object for each USB MIDI Endpoint */ err = parse_midi_2_0_endpoints(umidi); if (err < 0) return err; if (list_empty(&umidi->ep_list)) { usb_audio_warn(umidi->chip, "No MIDI endpoints found\n"); return -ENODEV; } /* * Next, look for EP I/O pairs that are found in group terminal blocks * A UMP object is created for each EP I/O pair as bidirecitonal * UMP EP */ list_for_each_entry(ep, &umidi->ep_list, list) { /* only input in this loop; output is matched in find_midi_ump() */ if (ep->direction != STR_IN) continue; for (blk = 0; blk < ep->ms_ep->bNumGrpTrmBlock; blk++) { id = ep->ms_ep->baAssoGrpTrmBlkID[blk]; err = find_matching_ep_partner(umidi, ep, id); if (err < 0) return err; } } /* * For the remaining EPs, treat as singles, create a UMP object with * unidirectional EP */ list_for_each_entry(ep, &umidi->ep_list, list) { if (ep->rmidi) continue; /* already paired */ for (blk = 0; blk < ep->ms_ep->bNumGrpTrmBlock; blk++) { id = ep->ms_ep->baAssoGrpTrmBlkID[blk]; if (find_midi2_ump(umidi, id)) continue; usb_audio_dbg(umidi->chip, "Creating a unidirection UMP for EP=0x%02x, blk=%d\n", ep->endpoint, id); if (ep->direction == STR_IN) err = create_midi2_ump(umidi, ep, NULL, id); else err = create_midi2_ump(umidi, NULL, ep, id); if (err < 0) return err; break; } } return 0; } /* is the given interface for MIDI 2.0? */ static bool is_midi2_altset(struct usb_host_interface *hostif) { struct usb_ms_header_descriptor *ms_header = (struct usb_ms_header_descriptor *)hostif->extra; if (hostif->extralen < 7 || ms_header->bLength < 7 || ms_header->bDescriptorType != USB_DT_CS_INTERFACE || ms_header->bDescriptorSubtype != UAC_HEADER) return false; return le16_to_cpu(ms_header->bcdMSC) == USB_MS_REV_MIDI_2_0; } /* change the altsetting */ static int set_altset(struct snd_usb_midi2_interface *umidi) { usb_audio_dbg(umidi->chip, "Setting host iface %d:%d\n", umidi->hostif->desc.bInterfaceNumber, umidi->hostif->desc.bAlternateSetting); return usb_set_interface(umidi->chip->dev, umidi->hostif->desc.bInterfaceNumber, umidi->hostif->desc.bAlternateSetting); } /* fill UMP Endpoint name string from USB descriptor */ static void fill_ump_ep_name(struct snd_ump_endpoint *ump, struct usb_device *dev, int id) { int len; usb_string(dev, id, ump->info.name, sizeof(ump->info.name)); /* trim superfluous "MIDI" suffix */ len = strlen(ump->info.name); if (len > 5 && !strcmp(ump->info.name + len - 5, " MIDI")) ump->info.name[len - 5] = 0; } /* fill the fallback name string for each rawmidi instance */ static void set_fallback_rawmidi_names(struct snd_usb_midi2_interface *umidi) { struct usb_device *dev = umidi->chip->dev; struct snd_usb_midi2_ump *rmidi; struct snd_ump_endpoint *ump; list_for_each_entry(rmidi, &umidi->rawmidi_list, list) { ump = rmidi->ump; /* fill UMP EP name from USB descriptors */ if (!*ump->info.name && umidi->hostif->desc.iInterface) fill_ump_ep_name(ump, dev, umidi->hostif->desc.iInterface); else if (!*ump->info.name && dev->descriptor.iProduct) fill_ump_ep_name(ump, dev, dev->descriptor.iProduct); /* fill fallback name */ if (!*ump->info.name) sprintf(ump->info.name, "USB MIDI %d", rmidi->index); /* copy as rawmidi name if not set */ if (!*ump->core.name) strscpy(ump->core.name, ump->info.name, sizeof(ump->core.name)); /* use serial number string as unique UMP product id */ if (!*ump->info.product_id && dev->descriptor.iSerialNumber) usb_string(dev, dev->descriptor.iSerialNumber, ump->info.product_id, sizeof(ump->info.product_id)); } } /* create MIDI interface; fallback to MIDI 1.0 if needed */ int snd_usb_midi_v2_create(struct snd_usb_audio *chip, struct usb_interface *iface, const struct snd_usb_audio_quirk *quirk, unsigned int usb_id) { struct snd_usb_midi2_interface *umidi; struct usb_host_interface *hostif; int err; usb_audio_dbg(chip, "Parsing interface %d...\n", iface->altsetting[0].desc.bInterfaceNumber); /* fallback to MIDI 1.0? */ if (!midi2_enable) { usb_audio_info(chip, "Falling back to MIDI 1.0 by module option\n"); goto fallback_to_midi1; } if ((quirk && quirk->type != QUIRK_MIDI_STANDARD_INTERFACE) || iface->num_altsetting < 2) { usb_audio_info(chip, "Quirk or no altset; falling back to MIDI 1.0\n"); goto fallback_to_midi1; } hostif = &iface->altsetting[1]; if (!is_midi2_altset(hostif)) { usb_audio_info(chip, "No MIDI 2.0 at altset 1, falling back to MIDI 1.0\n"); goto fallback_to_midi1; } if (!hostif->desc.bNumEndpoints) { usb_audio_info(chip, "No endpoint at altset 1, falling back to MIDI 1.0\n"); goto fallback_to_midi1; } usb_audio_dbg(chip, "Creating a MIDI 2.0 instance for %d:%d\n", hostif->desc.bInterfaceNumber, hostif->desc.bAlternateSetting); umidi = kzalloc(sizeof(*umidi), GFP_KERNEL); if (!umidi) return -ENOMEM; umidi->chip = chip; umidi->iface = iface; umidi->hostif = hostif; INIT_LIST_HEAD(&umidi->rawmidi_list); INIT_LIST_HEAD(&umidi->ep_list); list_add_tail(&umidi->list, &chip->midi_v2_list); err = set_altset(umidi); if (err < 0) { usb_audio_err(chip, "Failed to set altset\n"); goto error; } /* assume only altset 1 corresponding to MIDI 2.0 interface */ err = parse_midi_2_0(umidi); if (err < 0) { usb_audio_err(chip, "Failed to parse MIDI 2.0 interface\n"); goto error; } /* parse USB group terminal blocks */ err = parse_group_terminal_blocks(umidi); if (err < 0) { usb_audio_err(chip, "Failed to parse GTB\n"); goto error; } err = start_input_streams(umidi); if (err < 0) { usb_audio_err(chip, "Failed to start input streams\n"); goto error; } if (midi2_ump_probe) { err = parse_ump_endpoints(umidi); if (err < 0) { usb_audio_err(chip, "Failed to parse UMP endpoint\n"); goto error; } } err = create_blocks_from_gtb(umidi); if (err < 0) { usb_audio_err(chip, "Failed to create GTB blocks\n"); goto error; } set_fallback_rawmidi_names(umidi); err = attach_legacy_rawmidi(umidi); if (err < 0) { usb_audio_err(chip, "Failed to create legacy rawmidi\n"); goto error; } return 0; error: snd_usb_midi_v2_free(umidi); return err; fallback_to_midi1: return __snd_usbmidi_create(chip->card, iface, &chip->midi_list, quirk, usb_id, &chip->num_rawmidis); } static void suspend_midi2_endpoint(struct snd_usb_midi2_endpoint *ep) { kill_midi_urbs(ep, true); drain_urb_queue(ep); } void snd_usb_midi_v2_suspend_all(struct snd_usb_audio *chip) { struct snd_usb_midi2_interface *umidi; struct snd_usb_midi2_endpoint *ep; list_for_each_entry(umidi, &chip->midi_v2_list, list) { list_for_each_entry(ep, &umidi->ep_list, list) suspend_midi2_endpoint(ep); } } static void resume_midi2_endpoint(struct snd_usb_midi2_endpoint *ep) { ep->running = ep->suspended; if (ep->direction == STR_IN) submit_io_urbs(ep); /* FIXME: does it all? */ } void snd_usb_midi_v2_resume_all(struct snd_usb_audio *chip) { struct snd_usb_midi2_interface *umidi; struct snd_usb_midi2_endpoint *ep; list_for_each_entry(umidi, &chip->midi_v2_list, list) { set_altset(umidi); list_for_each_entry(ep, &umidi->ep_list, list) resume_midi2_endpoint(ep); } } void snd_usb_midi_v2_disconnect_all(struct snd_usb_audio *chip) { struct snd_usb_midi2_interface *umidi; struct snd_usb_midi2_endpoint *ep; list_for_each_entry(umidi, &chip->midi_v2_list, list) { umidi->disconnected = 1; list_for_each_entry(ep, &umidi->ep_list, list) { ep->disconnected = 1; kill_midi_urbs(ep, false); drain_urb_queue(ep); } } } /* release the MIDI instance */ void snd_usb_midi_v2_free_all(struct snd_usb_audio *chip) { struct snd_usb_midi2_interface *umidi, *next; list_for_each_entry_safe(umidi, next, &chip->midi_v2_list, list) snd_usb_midi_v2_free(umidi); }
700 14234 3385 78 153 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 /* SPDX-License-Identifier: GPL-2.0 */ /* rwsem.h: R/W semaphores, public interface * * Written by David Howells (dhowells@redhat.com). * Derived from asm-i386/semaphore.h */ #ifndef _LINUX_RWSEM_H #define _LINUX_RWSEM_H #include <linux/linkage.h> #include <linux/types.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/err.h> #include <linux/cleanup.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_SLEEP, \ }, #else # define __RWSEM_DEP_MAP_INIT(lockname) #endif #ifndef CONFIG_PREEMPT_RT #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #include <linux/osq_lock.h> #endif /* * For an uncontended rwsem, count and owner are the only fields a task * needs to touch when acquiring the rwsem. So they are put next to each * other to increase the chance that they will share the same cacheline. * * In a contended rwsem, the owner is likely the most frequently accessed * field in the structure as the optimistic waiter that holds the osq lock * will spin on owner. For an embedded rwsem, other hot fields in the * containing structure should be moved further away from the rwsem to * reduce the chance that they will share the same cacheline causing * cacheline bouncing problem. */ struct rw_semaphore { atomic_long_t count; /* * Write owner or one of the read owners as well flags regarding * the current state of the rwsem. Can be used as a speculative * check to see if the write owner is running on the cpu. */ atomic_long_t owner; #ifdef CONFIG_RWSEM_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_RWSEMS void *magic; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif }; #define RWSEM_UNLOCKED_VALUE 0UL #define RWSEM_WRITER_LOCKED (1UL << 0) #define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) static inline int rwsem_is_locked(struct rw_semaphore *sem) { return atomic_long_read(&sem->count) != RWSEM_UNLOCKED_VALUE; } static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) { WARN_ON(atomic_long_read(&sem->count) == RWSEM_UNLOCKED_VALUE); } static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) { WARN_ON(!(atomic_long_read(&sem->count) & RWSEM_WRITER_LOCKED)); } /* Common initializer macros and functions */ #ifdef CONFIG_DEBUG_RWSEMS # define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname, #else # define __RWSEM_DEBUG_INIT(lockname) #endif #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED, #else #define __RWSEM_OPT_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ { __RWSEM_COUNT_INIT(name), \ .owner = ATOMIC_LONG_INIT(0), \ __RWSEM_OPT_INIT(name) \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ .wait_list = LIST_HEAD_INIT((name).wait_list), \ __RWSEM_DEBUG_INIT(name) \ __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) extern void __init_rwsem(struct rw_semaphore *sem, const char *name, struct lock_class_key *key); #define init_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ __init_rwsem((sem), #sem, &__key); \ } while (0) /* * This is the same regardless of which rwsem implementation that is being used. * It is just a heuristic meant to be called by somebody already holding the * rwsem to see if somebody from an incompatible type is wanting access to the * lock. */ static inline int rwsem_is_contended(struct rw_semaphore *sem) { return !list_empty(&sem->wait_list); } #else /* !CONFIG_PREEMPT_RT */ #include <linux/rwbase_rt.h> struct rw_semaphore { struct rwbase_rt rwbase; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif }; #define __RWSEM_INITIALIZER(name) \ { \ .rwbase = __RWBASE_INITIALIZER(name), \ __RWSEM_DEP_MAP_INIT(name) \ } #define DECLARE_RWSEM(lockname) \ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) extern void __init_rwsem(struct rw_semaphore *rwsem, const char *name, struct lock_class_key *key); #define init_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ __init_rwsem((sem), #sem, &__key); \ } while (0) static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem) { return rw_base_is_locked(&sem->rwbase); } static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) { WARN_ON(!rwsem_is_locked(sem)); } static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) { WARN_ON(!rw_base_is_write_locked(&sem->rwbase)); } static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) { return rw_base_is_contended(&sem->rwbase); } #endif /* CONFIG_PREEMPT_RT */ /* * The functions below are the same for all rwsem implementations including * the RT specific variant. */ static inline void rwsem_assert_held(const struct rw_semaphore *sem) { if (IS_ENABLED(CONFIG_LOCKDEP)) lockdep_assert_held(sem); else rwsem_assert_held_nolockdep(sem); } static inline void rwsem_assert_held_write(const struct rw_semaphore *sem) { if (IS_ENABLED(CONFIG_LOCKDEP)) lockdep_assert_held_write(sem); else rwsem_assert_held_write_nolockdep(sem); } /* * lock for reading */ extern void down_read(struct rw_semaphore *sem); extern int __must_check down_read_interruptible(struct rw_semaphore *sem); extern int __must_check down_read_killable(struct rw_semaphore *sem); /* * trylock for reading -- returns 1 if successful, 0 if contention */ extern int down_read_trylock(struct rw_semaphore *sem); /* * lock for writing */ extern void down_write(struct rw_semaphore *sem); extern int __must_check down_write_killable(struct rw_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention */ extern int down_write_trylock(struct rw_semaphore *sem); /* * release a read lock */ extern void up_read(struct rw_semaphore *sem); /* * release a write lock */ extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) /* * downgrade write lock to read lock */ extern void downgrade_write(struct rw_semaphore *sem); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * nested locking. NOTE: rwsems are not allowed to recurse * (which occurs if the same task tries to acquire the same * lock instance multiple times), but multiple locks of the * same lock class might be taken, if the order of the locks * is always the same. This ordering rule can be expressed * to lockdep via the _nested() APIs, but enumerating the * subclasses that are used. (If the nesting relationship is * static then another method for expressing nested locking is * the explicit definition of lock class keys and the use of * lockdep_set_class() at lock initialization time. * See Documentation/locking/lockdep-design.rst for more details.) */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); # define down_write_nest_lock(sem, nest_lock) \ do { \ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ } while (0) /* * Take/release a lock when not the owner will release it. * * [ This API should be avoided as much as possible - the * proper abstraction for this case is completions. ] */ extern void down_read_non_owner(struct rw_semaphore *sem); extern void up_read_non_owner(struct rw_semaphore *sem); #else # define down_read_nested(sem, subclass) down_read(sem) # define down_read_killable_nested(sem, subclass) down_read_killable(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) # define down_write_killable_nested(sem, subclass) down_write_killable(sem) # define down_read_non_owner(sem) down_read(sem) # define up_read_non_owner(sem) up_read(sem) #endif #endif /* _LINUX_RWSEM_H */
10 484 486 486 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/sysfs.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Theodore Ts'o (tytso@mit.edu) * */ #include <linux/time.h> #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/proc_fs.h> #include <linux/part_stat.h> #include "ext4.h" #include "ext4_jbd2.h" typedef enum { attr_noop, attr_delayed_allocation_blocks, attr_session_write_kbytes, attr_lifetime_write_kbytes, attr_reserved_clusters, attr_sra_exceeded_retry_limit, attr_inode_readahead, attr_trigger_test_error, attr_first_error_time, attr_last_error_time, attr_clusters_in_group, attr_mb_order, attr_feature, attr_pointer_pi, attr_pointer_ui, attr_pointer_ul, attr_pointer_u64, attr_pointer_u8, attr_pointer_string, attr_pointer_atomic, attr_journal_task, } attr_id_t; typedef enum { ptr_explicit, ptr_ext4_sb_info_offset, ptr_ext4_super_block_offset, } attr_ptr_t; static const char proc_dirname[] = "fs/ext4"; static struct proc_dir_entry *ext4_proc_root; struct ext4_attr { struct attribute attr; short attr_id; short attr_ptr; unsigned short attr_size; union { int offset; void *explicit_ptr; } u; }; static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; return sysfs_emit(buf, "%lu\n", (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - sbi->s_sectors_written_start) >> 1); } static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; return sysfs_emit(buf, "%llu\n", (unsigned long long)(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1))); } static ssize_t inode_readahead_blks_store(struct ext4_sb_info *sbi, const char *buf, size_t count) { unsigned long t; int ret; ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; if (t && (!is_power_of_2(t) || t > 0x40000000)) return -EINVAL; sbi->s_inode_readahead_blks = t; return count; } static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi, const char *buf, size_t count) { unsigned long long val; ext4_fsblk_t clusters = (ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits); int ret; ret = kstrtoull(skip_spaces(buf), 0, &val); if (ret || val >= clusters || (s64)val < 0) return -EINVAL; atomic64_set(&sbi->s_resv_clusters, val); return count; } static ssize_t trigger_test_error(struct ext4_sb_info *sbi, const char *buf, size_t count) { int len = count; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (len && buf[len-1] == '\n') len--; if (len) ext4_error(sbi->s_sb, "%.*s", len, buf); return count; } static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf) { if (!sbi->s_journal) return sysfs_emit(buf, "<none>\n"); return sysfs_emit(buf, "%d\n", task_pid_vnr(sbi->s_journal->j_task)); } #define EXT4_ATTR(_name,_mode,_id) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .attr_id = attr_##_id, \ } #define EXT4_ATTR_FUNC(_name,_mode) EXT4_ATTR(_name,_mode,_name) #define EXT4_ATTR_FEATURE(_name) EXT4_ATTR(_name, 0444, feature) #define EXT4_ATTR_OFFSET(_name,_mode,_id,_struct,_elname) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .attr_id = attr_##_id, \ .attr_ptr = ptr_##_struct##_offset, \ .u = { \ .offset = offsetof(struct _struct, _elname),\ }, \ } #define EXT4_ATTR_STRING(_name,_mode,_size,_struct,_elname) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .attr_id = attr_pointer_string, \ .attr_size = _size, \ .attr_ptr = ptr_##_struct##_offset, \ .u = { \ .offset = offsetof(struct _struct, _elname),\ }, \ } #define EXT4_RO_ATTR_ES_UI(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0444, pointer_ui, ext4_super_block, _elname) #define EXT4_RO_ATTR_ES_U8(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0444, pointer_u8, ext4_super_block, _elname) #define EXT4_RO_ATTR_ES_U64(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0444, pointer_u64, ext4_super_block, _elname) #define EXT4_RO_ATTR_ES_STRING(_name,_elname,_size) \ EXT4_ATTR_STRING(_name, 0444, _size, ext4_super_block, _elname) #define EXT4_RW_ATTR_SBI_PI(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0644, pointer_pi, ext4_sb_info, _elname) #define EXT4_RW_ATTR_SBI_UI(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname) #define EXT4_RW_ATTR_SBI_UL(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0644, pointer_ul, ext4_sb_info, _elname) #define EXT4_RO_ATTR_SBI_ATOMIC(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0444, pointer_atomic, ext4_sb_info, _elname) #define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .attr_id = attr_##_id, \ .attr_ptr = ptr_explicit, \ .u = { \ .explicit_ptr = _ptr, \ }, \ } #define ATTR_LIST(name) &ext4_attr_##name.attr EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444); EXT4_ATTR_FUNC(session_write_kbytes, 0444); EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444); EXT4_ATTR_FUNC(reserved_clusters, 0644); EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group, ext4_sb_info, s_mb_group_prealloc); EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order, ext4_sb_info, s_mb_best_avail_max_trim_order); EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); EXT4_RW_ATTR_SBI_PI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); EXT4_RW_ATTR_SBI_PI(err_ratelimit_burst, s_err_ratelimit_state.burst); EXT4_RW_ATTR_SBI_PI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); EXT4_RW_ATTR_SBI_PI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_PI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_PI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); #ifdef CONFIG_EXT4_DEBUG EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail); #endif EXT4_RO_ATTR_SBI_ATOMIC(warning_count, s_warning_count); EXT4_RO_ATTR_SBI_ATOMIC(msg_count, s_msg_count); EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); EXT4_RO_ATTR_ES_U8(first_error_errcode, s_first_error_errcode); EXT4_RO_ATTR_ES_U8(last_error_errcode, s_last_error_errcode); EXT4_RO_ATTR_ES_UI(first_error_ino, s_first_error_ino); EXT4_RO_ATTR_ES_UI(last_error_ino, s_last_error_ino); EXT4_RO_ATTR_ES_U64(first_error_block, s_first_error_block); EXT4_RO_ATTR_ES_U64(last_error_block, s_last_error_block); EXT4_RO_ATTR_ES_UI(first_error_line, s_first_error_line); EXT4_RO_ATTR_ES_UI(last_error_line, s_last_error_line); EXT4_RO_ATTR_ES_STRING(first_error_func, s_first_error_func, 32); EXT4_RO_ATTR_ES_STRING(last_error_func, s_last_error_func, 32); EXT4_ATTR(first_error_time, 0444, first_error_time); EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(journal_task, 0444, journal_task); EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); EXT4_RW_ATTR_SBI_UL(last_trim_minblks, s_last_trim_minblks); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(reserved_clusters), ATTR_LIST(sra_exceeded_retry_limit), ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), ATTR_LIST(mb_max_to_scan), ATTR_LIST(mb_min_to_scan), ATTR_LIST(mb_order2_req), ATTR_LIST(mb_stream_req), ATTR_LIST(mb_group_prealloc), ATTR_LIST(mb_max_linear_groups), ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(trigger_fs_error), ATTR_LIST(err_ratelimit_interval_ms), ATTR_LIST(err_ratelimit_burst), ATTR_LIST(warning_ratelimit_interval_ms), ATTR_LIST(warning_ratelimit_burst), ATTR_LIST(msg_ratelimit_interval_ms), ATTR_LIST(msg_ratelimit_burst), ATTR_LIST(mb_best_avail_max_trim_order), ATTR_LIST(errors_count), ATTR_LIST(warning_count), ATTR_LIST(msg_count), ATTR_LIST(first_error_ino), ATTR_LIST(last_error_ino), ATTR_LIST(first_error_block), ATTR_LIST(last_error_block), ATTR_LIST(first_error_line), ATTR_LIST(last_error_line), ATTR_LIST(first_error_func), ATTR_LIST(last_error_func), ATTR_LIST(first_error_errcode), ATTR_LIST(last_error_errcode), ATTR_LIST(first_error_time), ATTR_LIST(last_error_time), ATTR_LIST(journal_task), #ifdef CONFIG_EXT4_DEBUG ATTR_LIST(simulate_fail), #endif ATTR_LIST(mb_prefetch), ATTR_LIST(mb_prefetch_limit), ATTR_LIST(last_trim_minblks), NULL, }; ATTRIBUTE_GROUPS(ext4); /* Features this copy of ext4 supports */ EXT4_ATTR_FEATURE(lazy_itable_init); EXT4_ATTR_FEATURE(batched_discard); EXT4_ATTR_FEATURE(meta_bg_resize); #ifdef CONFIG_FS_ENCRYPTION EXT4_ATTR_FEATURE(encryption); EXT4_ATTR_FEATURE(test_dummy_encryption_v2); #endif #if IS_ENABLED(CONFIG_UNICODE) EXT4_ATTR_FEATURE(casefold); #endif #ifdef CONFIG_FS_VERITY EXT4_ATTR_FEATURE(verity); #endif EXT4_ATTR_FEATURE(metadata_csum_seed); EXT4_ATTR_FEATURE(fast_commit); #if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) EXT4_ATTR_FEATURE(encrypted_casefold); #endif static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), ATTR_LIST(batched_discard), ATTR_LIST(meta_bg_resize), #ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), #endif #if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(casefold), #endif #ifdef CONFIG_FS_VERITY ATTR_LIST(verity), #endif ATTR_LIST(metadata_csum_seed), ATTR_LIST(fast_commit), #if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) ATTR_LIST(encrypted_casefold), #endif NULL, }; ATTRIBUTE_GROUPS(ext4_feat); static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) { switch (a->attr_ptr) { case ptr_explicit: return a->u.explicit_ptr; case ptr_ext4_sb_info_offset: return (void *) (((char *) sbi) + a->u.offset); case ptr_ext4_super_block_offset: return (void *) (((char *) sbi->s_es) + a->u.offset); } return NULL; } static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi) { return sysfs_emit(buf, "%lld\n", ((time64_t)hi << 32) + le32_to_cpu(lo)); } #define print_tstamp(buf, es, tstamp) \ __print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi) static ssize_t ext4_generic_attr_show(struct ext4_attr *a, struct ext4_sb_info *sbi, char *buf) { void *ptr = calc_ptr(a, sbi); if (!ptr) return 0; switch (a->attr_id) { case attr_inode_readahead: case attr_clusters_in_group: case attr_mb_order: case attr_pointer_pi: case attr_pointer_ui: if (a->attr_ptr == ptr_ext4_super_block_offset) return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr)); return sysfs_emit(buf, "%u\n", *((unsigned int *) ptr)); case attr_pointer_ul: return sysfs_emit(buf, "%lu\n", *((unsigned long *) ptr)); case attr_pointer_u8: return sysfs_emit(buf, "%u\n", *((unsigned char *) ptr)); case attr_pointer_u64: if (a->attr_ptr == ptr_ext4_super_block_offset) return sysfs_emit(buf, "%llu\n", le64_to_cpup(ptr)); return sysfs_emit(buf, "%llu\n", *((unsigned long long *) ptr)); case attr_pointer_string: return sysfs_emit(buf, "%.*s\n", a->attr_size, (char *) ptr); case attr_pointer_atomic: return sysfs_emit(buf, "%d\n", atomic_read((atomic_t *) ptr)); } return 0; } static ssize_t ext4_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, s_kobj); struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); switch (a->attr_id) { case attr_delayed_allocation_blocks: return sysfs_emit(buf, "%llu\n", (s64) EXT4_C2B(sbi, percpu_counter_sum(&sbi->s_dirtyclusters_counter))); case attr_session_write_kbytes: return session_write_kbytes_show(sbi, buf); case attr_lifetime_write_kbytes: return lifetime_write_kbytes_show(sbi, buf); case attr_reserved_clusters: return sysfs_emit(buf, "%llu\n", (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); case attr_sra_exceeded_retry_limit: return sysfs_emit(buf, "%llu\n", (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_feature: return sysfs_emit(buf, "supported\n"); case attr_first_error_time: return print_tstamp(buf, sbi->s_es, s_first_error_time); case attr_last_error_time: return print_tstamp(buf, sbi->s_es, s_last_error_time); case attr_journal_task: return journal_task_show(sbi, buf); default: return ext4_generic_attr_show(a, sbi, buf); } } static ssize_t ext4_generic_attr_store(struct ext4_attr *a, struct ext4_sb_info *sbi, const char *buf, size_t len) { int ret; unsigned int t; unsigned long lt; void *ptr = calc_ptr(a, sbi); if (!ptr) return 0; switch (a->attr_id) { case attr_pointer_pi: ret = kstrtouint(skip_spaces(buf), 0, &t); if (ret) return ret; if ((int)t < 0) return -EINVAL; *((unsigned int *) ptr) = t; return len; case attr_pointer_ui: ret = kstrtouint(skip_spaces(buf), 0, &t); if (ret) return ret; if (a->attr_ptr == ptr_ext4_super_block_offset) *((__le32 *) ptr) = cpu_to_le32(t); else *((unsigned int *) ptr) = t; return len; case attr_mb_order: ret = kstrtouint(skip_spaces(buf), 0, &t); if (ret) return ret; if (t > 64) return -EINVAL; *((unsigned int *) ptr) = t; return len; case attr_clusters_in_group: ret = kstrtouint(skip_spaces(buf), 0, &t); if (ret) return ret; if (t > sbi->s_clusters_per_group) return -EINVAL; *((unsigned int *) ptr) = t; return len; case attr_pointer_ul: ret = kstrtoul(skip_spaces(buf), 0, &lt); if (ret) return ret; *((unsigned long *) ptr) = lt; return len; } return 0; } static ssize_t ext4_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len) { struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, s_kobj); struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); switch (a->attr_id) { case attr_reserved_clusters: return reserved_clusters_store(sbi, buf, len); case attr_inode_readahead: return inode_readahead_blks_store(sbi, buf, len); case attr_trigger_test_error: return trigger_test_error(sbi, buf, len); default: return ext4_generic_attr_store(a, sbi, buf, len); } } static void ext4_sb_release(struct kobject *kobj) { struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, s_kobj); complete(&sbi->s_kobj_unregister); } static void ext4_feat_release(struct kobject *kobj) { kfree(kobj); } static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, }; static const struct kobj_type ext4_sb_ktype = { .default_groups = ext4_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_sb_release, }; static const struct kobj_type ext4_feat_ktype = { .default_groups = ext4_feat_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_feat_release, }; void ext4_notify_error_sysfs(struct ext4_sb_info *sbi) { sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); } static struct kobject *ext4_root; static struct kobject *ext4_feat; int ext4_register_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); int err; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root, "%s", sb->s_id); if (err) { kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); return err; } if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); if (sbi->s_proc) { proc_create_single_data("options", S_IRUGO, sbi->s_proc, ext4_seq_options_show, sb); proc_create_single_data("es_shrinker_info", S_IRUGO, sbi->s_proc, ext4_seq_es_shrinker_info_show, sb); proc_create_single_data("fc_info", 0444, sbi->s_proc, ext4_fc_info_show, sb); proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_ops, sb); proc_create_single_data("mb_stats", 0444, sbi->s_proc, ext4_seq_mb_stats_show, sb); proc_create_seq_data("mb_structs_summary", 0444, sbi->s_proc, &ext4_mb_seq_structs_summary_ops, sb); } return 0; } void ext4_unregister_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); if (sbi->s_proc) remove_proc_subtree(sb->s_id, ext4_proc_root); kobject_del(&sbi->s_kobj); } int __init ext4_init_sysfs(void) { int ret; ext4_root = kobject_create_and_add("ext4", fs_kobj); if (!ext4_root) return -ENOMEM; ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL); if (!ext4_feat) { ret = -ENOMEM; goto root_err; } ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype, ext4_root, "features"); if (ret) goto feat_err; ext4_proc_root = proc_mkdir(proc_dirname, NULL); return ret; feat_err: kobject_put(ext4_feat); ext4_feat = NULL; root_err: kobject_put(ext4_root); ext4_root = NULL; return ret; } void ext4_exit_sysfs(void) { kobject_put(ext4_feat); ext4_feat = NULL; kobject_put(ext4_root); ext4_root = NULL; remove_proc_entry(proc_dirname, NULL); ext4_proc_root = NULL; }
115 5 108 2 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/sun.c * * Code extracted from drivers/block/genhd.c * * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King */ #include "check.h" #define SUN_LABEL_MAGIC 0xDABE #define SUN_VTOC_SANITY 0x600DDEEE enum { SUN_WHOLE_DISK = 5, LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ }; int sun_partition(struct parsed_partitions *state) { int i; __be16 csum; int slot = 1; __be16 *ush; Sector sect; struct sun_disklabel { unsigned char info[128]; /* Informative text string */ struct sun_vtoc { __be32 version; /* Layout version */ char volume[8]; /* Volume name */ __be16 nparts; /* Number of partitions */ struct sun_info { /* Partition hdrs, sec 2 */ __be16 id; __be16 flags; } infos[8]; __be16 padding; /* Alignment padding */ __be32 bootinfo[3]; /* Info needed by mboot */ __be32 sanity; /* To verify vtoc sanity */ __be32 reserved[10]; /* Free space */ __be32 timestamp[8]; /* Partition timestamp */ } vtoc; __be32 write_reinstruct; /* sectors to skip, writes */ __be32 read_reinstruct; /* sectors to skip, reads */ unsigned char spare[148]; /* Padding */ __be16 rspeed; /* Disk rotational speed */ __be16 pcylcount; /* Physical cylinder count */ __be16 sparecyl; /* extra sects per cylinder */ __be16 obs1; /* gap1 */ __be16 obs2; /* gap2 */ __be16 ilfact; /* Interleave factor */ __be16 ncyl; /* Data cylinder count */ __be16 nacyl; /* Alt. cylinder count */ __be16 ntrks; /* Tracks per cylinder */ __be16 nsect; /* Sectors per track */ __be16 obs3; /* bhead - Label head offset */ __be16 obs4; /* ppart - Physical Partition */ struct sun_partition { __be32 start_cylinder; __be32 num_sectors; } partitions[8]; __be16 magic; /* Magic number */ __be16 csum; /* Label xor'd checksum */ } * label; struct sun_partition *p; unsigned long spc; int use_vtoc; int nparts; label = read_part_sector(state, 0, &sect); if (!label) return -1; p = label->partitions; if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) { /* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n", state->disk->disk_name, be16_to_cpu(label->magic)); */ put_dev_sector(sect); return 0; } /* Look at the checksum */ ush = ((__be16 *) (label+1)) - 1; for (csum = 0; ush >= ((__be16 *) label);) csum ^= *ush--; if (csum) { printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", state->disk->disk_name); put_dev_sector(sect); return 0; } /* Check to see if we can use the VTOC table */ use_vtoc = ((be32_to_cpu(label->vtoc.sanity) == SUN_VTOC_SANITY) && (be32_to_cpu(label->vtoc.version) == 1) && (be16_to_cpu(label->vtoc.nparts) <= 8)); /* Use 8 partition entries if not specified in validated VTOC */ nparts = (use_vtoc) ? be16_to_cpu(label->vtoc.nparts) : 8; /* * So that old Linux-Sun partitions continue to work, * alow the VTOC to be used under the additional condition ... */ use_vtoc = use_vtoc || !(label->vtoc.sanity || label->vtoc.version || label->vtoc.nparts); spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); for (i = 0; i < nparts; i++, p++) { unsigned long st_sector; unsigned int num_sectors; st_sector = be32_to_cpu(p->start_cylinder) * spc; num_sectors = be32_to_cpu(p->num_sectors); if (num_sectors) { put_partition(state, slot, st_sector, num_sectors); state->parts[slot].flags = 0; if (use_vtoc) { if (be16_to_cpu(label->vtoc.infos[i].id) == LINUX_RAID_PARTITION) state->parts[slot].flags |= ADDPART_FLAG_RAID; else if (be16_to_cpu(label->vtoc.infos[i].id) == SUN_WHOLE_DISK) state->parts[slot].flags |= ADDPART_FLAG_WHOLEDISK; } } slot++; } strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; }
21 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 /* * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ /*-************************************* * Dependencies ***************************************/ #define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ #include "error_private.h" #include "zstd_internal.h" /*-**************************************** * Version ******************************************/ unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } /*-**************************************** * ZSTD Error Management ******************************************/ #undef ZSTD_isError /* defined within zstd_internal.h */ /*! ZSTD_isError() : * tells if a return value is an error code * symbol is required for external callers */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } /*! ZSTD_getErrorName() : * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /*! ZSTD_getError() : * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } /*! ZSTD_getErrorString() : * provides error code string from enum */ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } /*=************************************************************** * Custom allocator ****************************************************************/ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) { if (customMem.customAlloc) return customMem.customAlloc(customMem.opaque, size); return ZSTD_malloc(size); } void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) { if (customMem.customAlloc) { /* calloc implemented as malloc+memset; * not as efficient as calloc, but next best guess for custom malloc */ void* const ptr = customMem.customAlloc(customMem.opaque, size); ZSTD_memset(ptr, 0, size); return ptr; } return ZSTD_calloc(1, size); } void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) { if (ptr!=NULL) { if (customMem.customFree) customMem.customFree(customMem.opaque, ptr); else ZSTD_free(ptr); } }
2 2 1 1 1 3 1 3 3 1 3 2 2 2 8 2 2 2 2 1 1 2 1 1 1 1 6 1 3 1 1 14 10 4 14 14 14 6 8 6 6 6 6 6 3 3 5 6 6 6 6 1 6 6 6 5 5 5 5 5 1 1 2 16 17 4 14 7 7 11 3 10 1 3 4 10 14 14 8 6 7 7 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 // SPDX-License-Identifier: GPL-2.0-or-later /* * USB HID support for Linux * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2007-2008 Oliver Neukum * Copyright (c) 2006-2010 Jiri Kosina */ /* */ #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/mutex.h> #include <linux/property.h> #include <linux/spinlock.h> #include <linux/unaligned.h> #include <asm/byteorder.h> #include <linux/input.h> #include <linux/wait.h> #include <linux/workqueue.h> #include <linux/string.h> #include <linux/usb.h> #include <linux/hid.h> #include <linux/hiddev.h> #include <linux/hid-debug.h> #include <linux/hidraw.h> #include "usbhid.h" /* * Version Information */ #define DRIVER_DESC "USB HID core driver" /* * Module parameters. */ static unsigned int hid_mousepoll_interval; module_param_named(mousepoll, hid_mousepoll_interval, uint, 0644); MODULE_PARM_DESC(mousepoll, "Polling interval of mice"); static unsigned int hid_jspoll_interval; module_param_named(jspoll, hid_jspoll_interval, uint, 0644); MODULE_PARM_DESC(jspoll, "Polling interval of joysticks"); static unsigned int hid_kbpoll_interval; module_param_named(kbpoll, hid_kbpoll_interval, uint, 0644); MODULE_PARM_DESC(kbpoll, "Polling interval of keyboards"); static unsigned int ignoreled; module_param_named(ignoreled, ignoreled, uint, 0644); MODULE_PARM_DESC(ignoreled, "Autosuspend with active leds"); /* Quirks specified at module load time */ static char *quirks_param[MAX_USBHID_BOOT_QUIRKS]; module_param_array_named(quirks, quirks_param, charp, NULL, 0444); MODULE_PARM_DESC(quirks, "Add/modify USB HID quirks by specifying " " quirks=vendorID:productID:quirks" " where vendorID, productID, and quirks are all in" " 0x-prefixed hex"); /* * Input submission and I/O error handler. */ static void hid_io_error(struct hid_device *hid); static int hid_submit_out(struct hid_device *hid); static int hid_submit_ctrl(struct hid_device *hid); static void hid_cancel_delayed_stuff(struct usbhid_device *usbhid); /* Start up the input URB */ static int hid_start_in(struct hid_device *hid) { unsigned long flags; int rc = 0; struct usbhid_device *usbhid = hid->driver_data; spin_lock_irqsave(&usbhid->lock, flags); if (test_bit(HID_IN_POLLING, &usbhid->iofl) && !test_bit(HID_DISCONNECTED, &usbhid->iofl) && !test_bit(HID_SUSPENDED, &usbhid->iofl) && !test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) { rc = usb_submit_urb(usbhid->urbin, GFP_ATOMIC); if (rc != 0) { clear_bit(HID_IN_RUNNING, &usbhid->iofl); if (rc == -ENOSPC) set_bit(HID_NO_BANDWIDTH, &usbhid->iofl); } else { clear_bit(HID_NO_BANDWIDTH, &usbhid->iofl); } } spin_unlock_irqrestore(&usbhid->lock, flags); return rc; } /* I/O retry timer routine */ static void hid_retry_timeout(struct timer_list *t) { struct usbhid_device *usbhid = from_timer(usbhid, t, io_retry); struct hid_device *hid = usbhid->hid; dev_dbg(&usbhid->intf->dev, "retrying intr urb\n"); if (hid_start_in(hid)) hid_io_error(hid); } /* Workqueue routine to reset the device or clear a halt */ static void hid_reset(struct work_struct *work) { struct usbhid_device *usbhid = container_of(work, struct usbhid_device, reset_work); struct hid_device *hid = usbhid->hid; int rc; if (test_bit(HID_CLEAR_HALT, &usbhid->iofl)) { dev_dbg(&usbhid->intf->dev, "clear halt\n"); rc = usb_clear_halt(hid_to_usb_dev(hid), usbhid->urbin->pipe); clear_bit(HID_CLEAR_HALT, &usbhid->iofl); if (rc == 0) { hid_start_in(hid); } else { dev_dbg(&usbhid->intf->dev, "clear-halt failed: %d\n", rc); set_bit(HID_RESET_PENDING, &usbhid->iofl); } } if (test_bit(HID_RESET_PENDING, &usbhid->iofl)) { dev_dbg(&usbhid->intf->dev, "resetting device\n"); usb_queue_reset_device(usbhid->intf); } } /* Main I/O error handler */ static void hid_io_error(struct hid_device *hid) { unsigned long flags; struct usbhid_device *usbhid = hid->driver_data; spin_lock_irqsave(&usbhid->lock, flags); /* Stop when disconnected */ if (test_bit(HID_DISCONNECTED, &usbhid->iofl)) goto done; /* If it has been a while since the last error, we'll assume * this a brand new error and reset the retry timeout. */ if (time_after(jiffies, usbhid->stop_retry + HZ/2)) usbhid->retry_delay = 0; /* When an error occurs, retry at increasing intervals */ if (usbhid->retry_delay == 0) { usbhid->retry_delay = 13; /* Then 26, 52, 104, 104, ... */ usbhid->stop_retry = jiffies + msecs_to_jiffies(1000); } else if (usbhid->retry_delay < 100) usbhid->retry_delay *= 2; if (time_after(jiffies, usbhid->stop_retry)) { /* Retries failed, so do a port reset unless we lack bandwidth*/ if (!test_bit(HID_NO_BANDWIDTH, &usbhid->iofl) && !test_and_set_bit(HID_RESET_PENDING, &usbhid->iofl)) { schedule_work(&usbhid->reset_work); goto done; } } mod_timer(&usbhid->io_retry, jiffies + msecs_to_jiffies(usbhid->retry_delay)); done: spin_unlock_irqrestore(&usbhid->lock, flags); } static void usbhid_mark_busy(struct usbhid_device *usbhid) { struct usb_interface *intf = usbhid->intf; usb_mark_last_busy(interface_to_usbdev(intf)); } static int usbhid_restart_out_queue(struct usbhid_device *usbhid) { struct hid_device *hid = usb_get_intfdata(usbhid->intf); int kicked; int r; if (!hid || test_bit(HID_RESET_PENDING, &usbhid->iofl) || test_bit(HID_SUSPENDED, &usbhid->iofl)) return 0; if ((kicked = (usbhid->outhead != usbhid->outtail))) { hid_dbg(hid, "Kicking head %d tail %d", usbhid->outhead, usbhid->outtail); /* Try to wake up from autosuspend... */ r = usb_autopm_get_interface_async(usbhid->intf); if (r < 0) return r; /* * If still suspended, don't submit. Submission will * occur if/when resume drains the queue. */ if (test_bit(HID_SUSPENDED, &usbhid->iofl)) { usb_autopm_put_interface_no_suspend(usbhid->intf); return r; } /* Asynchronously flush queue. */ set_bit(HID_OUT_RUNNING, &usbhid->iofl); if (hid_submit_out(hid)) { clear_bit(HID_OUT_RUNNING, &usbhid->iofl); usb_autopm_put_interface_async(usbhid->intf); } wake_up(&usbhid->wait); } return kicked; } static int usbhid_restart_ctrl_queue(struct usbhid_device *usbhid) { struct hid_device *hid = usb_get_intfdata(usbhid->intf); int kicked; int r; WARN_ON(hid == NULL); if (!hid || test_bit(HID_RESET_PENDING, &usbhid->iofl) || test_bit(HID_SUSPENDED, &usbhid->iofl)) return 0; if ((kicked = (usbhid->ctrlhead != usbhid->ctrltail))) { hid_dbg(hid, "Kicking head %d tail %d", usbhid->ctrlhead, usbhid->ctrltail); /* Try to wake up from autosuspend... */ r = usb_autopm_get_interface_async(usbhid->intf); if (r < 0) return r; /* * If still suspended, don't submit. Submission will * occur if/when resume drains the queue. */ if (test_bit(HID_SUSPENDED, &usbhid->iofl)) { usb_autopm_put_interface_no_suspend(usbhid->intf); return r; } /* Asynchronously flush queue. */ set_bit(HID_CTRL_RUNNING, &usbhid->iofl); if (hid_submit_ctrl(hid)) { clear_bit(HID_CTRL_RUNNING, &usbhid->iofl); usb_autopm_put_interface_async(usbhid->intf); } wake_up(&usbhid->wait); } return kicked; } /* * Input interrupt completion handler. */ static void hid_irq_in(struct urb *urb) { struct hid_device *hid = urb->context; struct usbhid_device *usbhid = hid->driver_data; int status; switch (urb->status) { case 0: /* success */ usbhid->retry_delay = 0; if (!test_bit(HID_OPENED, &usbhid->iofl)) break; usbhid_mark_busy(usbhid); if (!test_bit(HID_RESUME_RUNNING, &usbhid->iofl)) { hid_input_report(urb->context, HID_INPUT_REPORT, urb->transfer_buffer, urb->actual_length, 1); /* * autosuspend refused while keys are pressed * because most keyboards don't wake up when * a key is released */ if (hid_check_keys_pressed(hid)) set_bit(HID_KEYS_PRESSED, &usbhid->iofl); else clear_bit(HID_KEYS_PRESSED, &usbhid->iofl); } break; case -EPIPE: /* stall */ usbhid_mark_busy(usbhid); clear_bit(HID_IN_RUNNING, &usbhid->iofl); set_bit(HID_CLEAR_HALT, &usbhid->iofl); schedule_work(&usbhid->reset_work); return; case -ECONNRESET: /* unlink */ case -ENOENT: case -ESHUTDOWN: /* unplug */ clear_bit(HID_IN_RUNNING, &usbhid->iofl); return; case -EILSEQ: /* protocol error or unplug */ case -EPROTO: /* protocol error or unplug */ case -ETIME: /* protocol error or unplug */ case -ETIMEDOUT: /* Should never happen, but... */ usbhid_mark_busy(usbhid); clear_bit(HID_IN_RUNNING, &usbhid->iofl); hid_io_error(hid); return; default: /* error */ hid_warn(urb->dev, "input irq status %d received\n", urb->status); } status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { clear_bit(HID_IN_RUNNING, &usbhid->iofl); if (status != -EPERM) { hid_err(hid, "can't resubmit intr, %s-%s/input%d, status %d\n", hid_to_usb_dev(hid)->bus->bus_name, hid_to_usb_dev(hid)->devpath, usbhid->ifnum, status); hid_io_error(hid); } } } static int hid_submit_out(struct hid_device *hid) { struct hid_report *report; char *raw_report; struct usbhid_device *usbhid = hid->driver_data; int r; report = usbhid->out[usbhid->outtail].report; raw_report = usbhid->out[usbhid->outtail].raw_report; usbhid->urbout->transfer_buffer_length = hid_report_len(report); usbhid->urbout->dev = hid_to_usb_dev(hid); if (raw_report) { memcpy(usbhid->outbuf, raw_report, usbhid->urbout->transfer_buffer_length); kfree(raw_report); usbhid->out[usbhid->outtail].raw_report = NULL; } dbg_hid("submitting out urb\n"); r = usb_submit_urb(usbhid->urbout, GFP_ATOMIC); if (r < 0) { hid_err(hid, "usb_submit_urb(out) failed: %d\n", r); return r; } usbhid->last_out = jiffies; return 0; } static int hid_submit_ctrl(struct hid_device *hid) { struct hid_report *report; unsigned char dir; char *raw_report; int len, r; struct usbhid_device *usbhid = hid->driver_data; report = usbhid->ctrl[usbhid->ctrltail].report; raw_report = usbhid->ctrl[usbhid->ctrltail].raw_report; dir = usbhid->ctrl[usbhid->ctrltail].dir; len = hid_report_len(report); if (dir == USB_DIR_OUT) { usbhid->urbctrl->pipe = usb_sndctrlpipe(hid_to_usb_dev(hid), 0); if (raw_report) { memcpy(usbhid->ctrlbuf, raw_report, len); kfree(raw_report); usbhid->ctrl[usbhid->ctrltail].raw_report = NULL; } } else { int maxpacket; usbhid->urbctrl->pipe = usb_rcvctrlpipe(hid_to_usb_dev(hid), 0); maxpacket = usb_maxpacket(hid_to_usb_dev(hid), usbhid->urbctrl->pipe); len += (len == 0); /* Don't allow 0-length reports */ len = round_up(len, maxpacket); if (len > usbhid->bufsize) len = usbhid->bufsize; } usbhid->urbctrl->transfer_buffer_length = len; usbhid->urbctrl->dev = hid_to_usb_dev(hid); usbhid->cr->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE | dir; usbhid->cr->bRequest = (dir == USB_DIR_OUT) ? HID_REQ_SET_REPORT : HID_REQ_GET_REPORT; usbhid->cr->wValue = cpu_to_le16(((report->type + 1) << 8) | report->id); usbhid->cr->wIndex = cpu_to_le16(usbhid->ifnum); usbhid->cr->wLength = cpu_to_le16(len); dbg_hid("submitting ctrl urb: %s wValue=0x%04x wIndex=0x%04x wLength=%u\n", usbhid->cr->bRequest == HID_REQ_SET_REPORT ? "Set_Report" : "Get_Report", usbhid->cr->wValue, usbhid->cr->wIndex, usbhid->cr->wLength); r = usb_submit_urb(usbhid->urbctrl, GFP_ATOMIC); if (r < 0) { hid_err(hid, "usb_submit_urb(ctrl) failed: %d\n", r); return r; } usbhid->last_ctrl = jiffies; return 0; } /* * Output interrupt completion handler. */ static void hid_irq_out(struct urb *urb) { struct hid_device *hid = urb->context; struct usbhid_device *usbhid = hid->driver_data; unsigned long flags; int unplug = 0; switch (urb->status) { case 0: /* success */ break; case -ESHUTDOWN: /* unplug */ unplug = 1; break; case -EILSEQ: /* protocol error or unplug */ case -EPROTO: /* protocol error or unplug */ case -ECONNRESET: /* unlink */ case -ENOENT: break; default: /* error */ hid_warn(urb->dev, "output irq status %d received\n", urb->status); } spin_lock_irqsave(&usbhid->lock, flags); if (unplug) { usbhid->outtail = usbhid->outhead; } else { usbhid->outtail = (usbhid->outtail + 1) & (HID_OUTPUT_FIFO_SIZE - 1); if (usbhid->outhead != usbhid->outtail && hid_submit_out(hid) == 0) { /* Successfully submitted next urb in queue */ spin_unlock_irqrestore(&usbhid->lock, flags); return; } } clear_bit(HID_OUT_RUNNING, &usbhid->iofl); spin_unlock_irqrestore(&usbhid->lock, flags); usb_autopm_put_interface_async(usbhid->intf); wake_up(&usbhid->wait); } /* * Control pipe completion handler. */ static void hid_ctrl(struct urb *urb) { struct hid_device *hid = urb->context; struct usbhid_device *usbhid = hid->driver_data; unsigned long flags; int unplug = 0, status = urb->status; switch (status) { case 0: /* success */ if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_IN) hid_input_report(urb->context, usbhid->ctrl[usbhid->ctrltail].report->type, urb->transfer_buffer, urb->actual_length, 0); break; case -ESHUTDOWN: /* unplug */ unplug = 1; break; case -EILSEQ: /* protocol error or unplug */ case -EPROTO: /* protocol error or unplug */ case -ECONNRESET: /* unlink */ case -ENOENT: case -EPIPE: /* report not available */ break; default: /* error */ hid_warn(urb->dev, "ctrl urb status %d received\n", status); } spin_lock_irqsave(&usbhid->lock, flags); if (unplug) { usbhid->ctrltail = usbhid->ctrlhead; } else if (usbhid->ctrlhead != usbhid->ctrltail) { usbhid->ctrltail = (usbhid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1); if (usbhid->ctrlhead != usbhid->ctrltail && hid_submit_ctrl(hid) == 0) { /* Successfully submitted next urb in queue */ spin_unlock_irqrestore(&usbhid->lock, flags); return; } } clear_bit(HID_CTRL_RUNNING, &usbhid->iofl); spin_unlock_irqrestore(&usbhid->lock, flags); usb_autopm_put_interface_async(usbhid->intf); wake_up(&usbhid->wait); } static void __usbhid_submit_report(struct hid_device *hid, struct hid_report *report, unsigned char dir) { int head; struct usbhid_device *usbhid = hid->driver_data; if (((hid->quirks & HID_QUIRK_NOGET) && dir == USB_DIR_IN) || test_bit(HID_DISCONNECTED, &usbhid->iofl)) return; if (usbhid->urbout && dir == USB_DIR_OUT && report->type == HID_OUTPUT_REPORT) { if ((head = (usbhid->outhead + 1) & (HID_OUTPUT_FIFO_SIZE - 1)) == usbhid->outtail) { hid_warn(hid, "output queue full\n"); return; } usbhid->out[usbhid->outhead].raw_report = hid_alloc_report_buf(report, GFP_ATOMIC); if (!usbhid->out[usbhid->outhead].raw_report) { hid_warn(hid, "output queueing failed\n"); return; } hid_output_report(report, usbhid->out[usbhid->outhead].raw_report); usbhid->out[usbhid->outhead].report = report; usbhid->outhead = head; /* If the queue isn't running, restart it */ if (!test_bit(HID_OUT_RUNNING, &usbhid->iofl)) { usbhid_restart_out_queue(usbhid); /* Otherwise see if an earlier request has timed out */ } else if (time_after(jiffies, usbhid->last_out + HZ * 5)) { /* Prevent autosuspend following the unlink */ usb_autopm_get_interface_no_resume(usbhid->intf); /* * Prevent resubmission in case the URB completes * before we can unlink it. We don't want to cancel * the wrong transfer! */ usb_block_urb(usbhid->urbout); /* Drop lock to avoid deadlock if the callback runs */ spin_unlock(&usbhid->lock); usb_unlink_urb(usbhid->urbout); spin_lock(&usbhid->lock); usb_unblock_urb(usbhid->urbout); /* Unlink might have stopped the queue */ if (!test_bit(HID_OUT_RUNNING, &usbhid->iofl)) usbhid_restart_out_queue(usbhid); /* Now we can allow autosuspend again */ usb_autopm_put_interface_async(usbhid->intf); } return; } if ((head = (usbhid->ctrlhead + 1) & (HID_CONTROL_FIFO_SIZE - 1)) == usbhid->ctrltail) { hid_warn(hid, "control queue full\n"); return; } if (dir == USB_DIR_OUT) { usbhid->ctrl[usbhid->ctrlhead].raw_report = hid_alloc_report_buf(report, GFP_ATOMIC); if (!usbhid->ctrl[usbhid->ctrlhead].raw_report) { hid_warn(hid, "control queueing failed\n"); return; } hid_output_report(report, usbhid->ctrl[usbhid->ctrlhead].raw_report); } usbhid->ctrl[usbhid->ctrlhead].report = report; usbhid->ctrl[usbhid->ctrlhead].dir = dir; usbhid->ctrlhead = head; /* If the queue isn't running, restart it */ if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) { usbhid_restart_ctrl_queue(usbhid); /* Otherwise see if an earlier request has timed out */ } else if (time_after(jiffies, usbhid->last_ctrl + HZ * 5)) { /* Prevent autosuspend following the unlink */ usb_autopm_get_interface_no_resume(usbhid->intf); /* * Prevent resubmission in case the URB completes * before we can unlink it. We don't want to cancel * the wrong transfer! */ usb_block_urb(usbhid->urbctrl); /* Drop lock to avoid deadlock if the callback runs */ spin_unlock(&usbhid->lock); usb_unlink_urb(usbhid->urbctrl); spin_lock(&usbhid->lock); usb_unblock_urb(usbhid->urbctrl); /* Unlink might have stopped the queue */ if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) usbhid_restart_ctrl_queue(usbhid); /* Now we can allow autosuspend again */ usb_autopm_put_interface_async(usbhid->intf); } } static void usbhid_submit_report(struct hid_device *hid, struct hid_report *report, unsigned char dir) { struct usbhid_device *usbhid = hid->driver_data; unsigned long flags; spin_lock_irqsave(&usbhid->lock, flags); __usbhid_submit_report(hid, report, dir); spin_unlock_irqrestore(&usbhid->lock, flags); } static int usbhid_wait_io(struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; if (!wait_event_timeout(usbhid->wait, (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl) && !test_bit(HID_OUT_RUNNING, &usbhid->iofl)), 10*HZ)) { dbg_hid("timeout waiting for ctrl or out queue to clear\n"); return -1; } return 0; } static int hid_set_idle(struct usb_device *dev, int ifnum, int report, int idle) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), HID_REQ_SET_IDLE, USB_TYPE_CLASS | USB_RECIP_INTERFACE, (idle << 8) | report, ifnum, NULL, 0, USB_CTRL_SET_TIMEOUT); } static int hid_get_class_descriptor(struct usb_device *dev, int ifnum, unsigned char type, void *buf, int size) { int result, retries = 4; memset(buf, 0, size); do { result = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_GET_DESCRIPTOR, USB_RECIP_INTERFACE | USB_DIR_IN, (type << 8), ifnum, buf, size, USB_CTRL_GET_TIMEOUT); retries--; } while (result < size && retries); return result; } static int usbhid_open(struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; int res; mutex_lock(&usbhid->mutex); set_bit(HID_OPENED, &usbhid->iofl); if (hid->quirks & HID_QUIRK_ALWAYS_POLL) { res = 0; goto Done; } res = usb_autopm_get_interface(usbhid->intf); /* the device must be awake to reliably request remote wakeup */ if (res < 0) { clear_bit(HID_OPENED, &usbhid->iofl); res = -EIO; goto Done; } usbhid->intf->needs_remote_wakeup = 1; set_bit(HID_RESUME_RUNNING, &usbhid->iofl); set_bit(HID_IN_POLLING, &usbhid->iofl); res = hid_start_in(hid); if (res) { if (res != -ENOSPC) { hid_io_error(hid); res = 0; } else { /* no use opening if resources are insufficient */ res = -EBUSY; clear_bit(HID_OPENED, &usbhid->iofl); clear_bit(HID_IN_POLLING, &usbhid->iofl); usbhid->intf->needs_remote_wakeup = 0; } } usb_autopm_put_interface(usbhid->intf); /* * In case events are generated while nobody was listening, * some are released when the device is re-opened. * Wait 50 msec for the queue to empty before allowing events * to go through hid. */ if (res == 0) msleep(50); clear_bit(HID_RESUME_RUNNING, &usbhid->iofl); Done: mutex_unlock(&usbhid->mutex); return res; } static void usbhid_close(struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; mutex_lock(&usbhid->mutex); /* * Make sure we don't restart data acquisition due to * a resumption we no longer care about by avoiding racing * with hid_start_in(). */ spin_lock_irq(&usbhid->lock); clear_bit(HID_OPENED, &usbhid->iofl); if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) clear_bit(HID_IN_POLLING, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) { hid_cancel_delayed_stuff(usbhid); usb_kill_urb(usbhid->urbin); usbhid->intf->needs_remote_wakeup = 0; } mutex_unlock(&usbhid->mutex); } /* * Initialize all reports */ void usbhid_init_reports(struct hid_device *hid) { struct hid_report *report; struct usbhid_device *usbhid = hid->driver_data; struct hid_report_enum *report_enum; int err, ret; report_enum = &hid->report_enum[HID_INPUT_REPORT]; list_for_each_entry(report, &report_enum->report_list, list) usbhid_submit_report(hid, report, USB_DIR_IN); report_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(report, &report_enum->report_list, list) usbhid_submit_report(hid, report, USB_DIR_IN); err = 0; ret = usbhid_wait_io(hid); while (ret) { err |= ret; if (test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) usb_kill_urb(usbhid->urbctrl); if (test_bit(HID_OUT_RUNNING, &usbhid->iofl)) usb_kill_urb(usbhid->urbout); ret = usbhid_wait_io(hid); } if (err) hid_warn(hid, "timeout initializing reports\n"); } /* * Reset LEDs which BIOS might have left on. For now, just NumLock (0x01). */ static int hid_find_field_early(struct hid_device *hid, unsigned int page, unsigned int hid_code, struct hid_field **pfield) { struct hid_report *report; struct hid_field *field; struct hid_usage *usage; int i, j; list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) { for (i = 0; i < report->maxfield; i++) { field = report->field[i]; for (j = 0; j < field->maxusage; j++) { usage = &field->usage[j]; if ((usage->hid & HID_USAGE_PAGE) == page && (usage->hid & 0xFFFF) == hid_code) { *pfield = field; return j; } } } } return -1; } static void usbhid_set_leds(struct hid_device *hid) { struct hid_field *field; int offset; if ((offset = hid_find_field_early(hid, HID_UP_LED, 0x01, &field)) != -1) { hid_set_field(field, offset, 0); usbhid_submit_report(hid, field->report, USB_DIR_OUT); } } /* * Traverse the supplied list of reports and find the longest */ static void hid_find_max_report(struct hid_device *hid, unsigned int type, unsigned int *max) { struct hid_report *report; unsigned int size; list_for_each_entry(report, &hid->report_enum[type].report_list, list) { size = ((report->size - 1) >> 3) + 1 + hid->report_enum[type].numbered; if (*max < size) *max = size; } } static int hid_alloc_buffers(struct usb_device *dev, struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; usbhid->inbuf = usb_alloc_coherent(dev, usbhid->bufsize, GFP_KERNEL, &usbhid->inbuf_dma); usbhid->outbuf = usb_alloc_coherent(dev, usbhid->bufsize, GFP_KERNEL, &usbhid->outbuf_dma); usbhid->cr = kmalloc(sizeof(*usbhid->cr), GFP_KERNEL); usbhid->ctrlbuf = usb_alloc_coherent(dev, usbhid->bufsize, GFP_KERNEL, &usbhid->ctrlbuf_dma); if (!usbhid->inbuf || !usbhid->outbuf || !usbhid->cr || !usbhid->ctrlbuf) return -1; return 0; } static int usbhid_get_raw_report(struct hid_device *hid, unsigned char report_number, __u8 *buf, size_t count, unsigned char report_type) { struct usbhid_device *usbhid = hid->driver_data; struct usb_device *dev = hid_to_usb_dev(hid); struct usb_interface *intf = usbhid->intf; struct usb_host_interface *interface = intf->cur_altsetting; int skipped_report_id = 0; int ret; /* Byte 0 is the report number. Report data starts at byte 1.*/ buf[0] = report_number; if (report_number == 0x0) { /* Offset the return buffer by 1, so that the report ID will remain in byte 0. */ buf++; count--; skipped_report_id = 1; } ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), HID_REQ_GET_REPORT, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, ((report_type + 1) << 8) | report_number, interface->desc.bInterfaceNumber, buf, count, USB_CTRL_SET_TIMEOUT); /* count also the report id */ if (ret > 0 && skipped_report_id) ret++; return ret; } static int usbhid_set_raw_report(struct hid_device *hid, unsigned int reportnum, __u8 *buf, size_t count, unsigned char rtype) { struct usbhid_device *usbhid = hid->driver_data; struct usb_device *dev = hid_to_usb_dev(hid); struct usb_interface *intf = usbhid->intf; struct usb_host_interface *interface = intf->cur_altsetting; int ret, skipped_report_id = 0; /* Byte 0 is the report number. Report data starts at byte 1.*/ if ((rtype == HID_OUTPUT_REPORT) && (hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORT_ID)) buf[0] = 0; else buf[0] = reportnum; if (buf[0] == 0x0) { /* Don't send the Report ID */ buf++; count--; skipped_report_id = 1; } ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), HID_REQ_SET_REPORT, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, ((rtype + 1) << 8) | reportnum, interface->desc.bInterfaceNumber, buf, count, USB_CTRL_SET_TIMEOUT); /* count also the report id, if this was a numbered report. */ if (ret > 0 && skipped_report_id) ret++; return ret; } static int usbhid_output_report(struct hid_device *hid, __u8 *buf, size_t count) { struct usbhid_device *usbhid = hid->driver_data; struct usb_device *dev = hid_to_usb_dev(hid); int actual_length, skipped_report_id = 0, ret; if (!usbhid->urbout) return -ENOSYS; if (buf[0] == 0x0) { /* Don't send the Report ID */ buf++; count--; skipped_report_id = 1; } ret = usb_interrupt_msg(dev, usbhid->urbout->pipe, buf, count, &actual_length, USB_CTRL_SET_TIMEOUT); /* return the number of bytes transferred */ if (ret == 0) { ret = actual_length; /* count also the report id */ if (skipped_report_id) ret++; } return ret; } static void hid_free_buffers(struct usb_device *dev, struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; usb_free_coherent(dev, usbhid->bufsize, usbhid->inbuf, usbhid->inbuf_dma); usb_free_coherent(dev, usbhid->bufsize, usbhid->outbuf, usbhid->outbuf_dma); kfree(usbhid->cr); usb_free_coherent(dev, usbhid->bufsize, usbhid->ctrlbuf, usbhid->ctrlbuf_dma); } static int usbhid_parse(struct hid_device *hid) { struct usb_interface *intf = to_usb_interface(hid->dev.parent); struct usb_host_interface *interface = intf->cur_altsetting; struct usb_device *dev = interface_to_usbdev (intf); struct hid_descriptor *hdesc; u32 quirks = 0; unsigned int rsize = 0; char *rdesc; int ret, n; int num_descriptors; size_t offset = offsetof(struct hid_descriptor, desc); quirks = hid_lookup_quirk(hid); if (quirks & HID_QUIRK_IGNORE) return -ENODEV; /* Many keyboards and mice don't like to be polled for reports, * so we will always set the HID_QUIRK_NOGET flag for them. */ if (interface->desc.bInterfaceSubClass == USB_INTERFACE_SUBCLASS_BOOT) { if (interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_KEYBOARD || interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) quirks |= HID_QUIRK_NOGET; } if (usb_get_extra_descriptor(interface, HID_DT_HID, &hdesc) && (!interface->desc.bNumEndpoints || usb_get_extra_descriptor(&interface->endpoint[0], HID_DT_HID, &hdesc))) { dbg_hid("class descriptor not present\n"); return -ENODEV; } if (hdesc->bLength < sizeof(struct hid_descriptor)) { dbg_hid("hid descriptor is too short\n"); return -EINVAL; } hid->version = le16_to_cpu(hdesc->bcdHID); hid->country = hdesc->bCountryCode; num_descriptors = min_t(int, hdesc->bNumDescriptors, (hdesc->bLength - offset) / sizeof(struct hid_class_descriptor)); for (n = 0; n < num_descriptors; n++) if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT) rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength); if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) { dbg_hid("weird size of report descriptor (%u)\n", rsize); return -EINVAL; } rdesc = kmalloc(rsize, GFP_KERNEL); if (!rdesc) return -ENOMEM; hid_set_idle(dev, interface->desc.bInterfaceNumber, 0, 0); ret = hid_get_class_descriptor(dev, interface->desc.bInterfaceNumber, HID_DT_REPORT, rdesc, rsize); if (ret < 0) { dbg_hid("reading report descriptor failed\n"); kfree(rdesc); goto err; } ret = hid_parse_report(hid, rdesc, rsize); kfree(rdesc); if (ret) { dbg_hid("parsing report descriptor failed\n"); goto err; } hid->quirks |= quirks; return 0; err: return ret; } static int usbhid_start(struct hid_device *hid) { struct usb_interface *intf = to_usb_interface(hid->dev.parent); struct usb_host_interface *interface = intf->cur_altsetting; struct usb_device *dev = interface_to_usbdev(intf); struct usbhid_device *usbhid = hid->driver_data; unsigned int n, insize = 0; int ret; mutex_lock(&usbhid->mutex); clear_bit(HID_DISCONNECTED, &usbhid->iofl); usbhid->bufsize = HID_MIN_BUFFER_SIZE; hid_find_max_report(hid, HID_INPUT_REPORT, &usbhid->bufsize); hid_find_max_report(hid, HID_OUTPUT_REPORT, &usbhid->bufsize); hid_find_max_report(hid, HID_FEATURE_REPORT, &usbhid->bufsize); if (usbhid->bufsize > HID_MAX_BUFFER_SIZE) usbhid->bufsize = HID_MAX_BUFFER_SIZE; hid_find_max_report(hid, HID_INPUT_REPORT, &insize); if (insize > HID_MAX_BUFFER_SIZE) insize = HID_MAX_BUFFER_SIZE; if (hid_alloc_buffers(dev, hid)) { ret = -ENOMEM; goto fail; } for (n = 0; n < interface->desc.bNumEndpoints; n++) { struct usb_endpoint_descriptor *endpoint; int pipe; int interval; endpoint = &interface->endpoint[n].desc; if (!usb_endpoint_xfer_int(endpoint)) continue; interval = endpoint->bInterval; /* Some vendors give fullspeed interval on highspeed devices */ if (hid->quirks & HID_QUIRK_FULLSPEED_INTERVAL && dev->speed == USB_SPEED_HIGH) { interval = fls(endpoint->bInterval*8); pr_info("%s: Fixing fullspeed to highspeed interval: %d -> %d\n", hid->name, endpoint->bInterval, interval); } /* Change the polling interval of mice, joysticks * and keyboards. */ switch (hid->collection->usage) { case HID_GD_MOUSE: if (hid_mousepoll_interval > 0) interval = hid_mousepoll_interval; break; case HID_GD_JOYSTICK: if (hid_jspoll_interval > 0) interval = hid_jspoll_interval; break; case HID_GD_KEYBOARD: if (hid_kbpoll_interval > 0) interval = hid_kbpoll_interval; break; } ret = -ENOMEM; if (usb_endpoint_dir_in(endpoint)) { if (usbhid->urbin) continue; if (!(usbhid->urbin = usb_alloc_urb(0, GFP_KERNEL))) goto fail; pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress); usb_fill_int_urb(usbhid->urbin, dev, pipe, usbhid->inbuf, insize, hid_irq_in, hid, interval); usbhid->urbin->transfer_dma = usbhid->inbuf_dma; usbhid->urbin->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; } else { if (usbhid->urbout) continue; if (!(usbhid->urbout = usb_alloc_urb(0, GFP_KERNEL))) goto fail; pipe = usb_sndintpipe(dev, endpoint->bEndpointAddress); usb_fill_int_urb(usbhid->urbout, dev, pipe, usbhid->outbuf, 0, hid_irq_out, hid, interval); usbhid->urbout->transfer_dma = usbhid->outbuf_dma; usbhid->urbout->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; } } usbhid->urbctrl = usb_alloc_urb(0, GFP_KERNEL); if (!usbhid->urbctrl) { ret = -ENOMEM; goto fail; } usb_fill_control_urb(usbhid->urbctrl, dev, 0, (void *) usbhid->cr, usbhid->ctrlbuf, 1, hid_ctrl, hid); usbhid->urbctrl->transfer_dma = usbhid->ctrlbuf_dma; usbhid->urbctrl->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; set_bit(HID_STARTED, &usbhid->iofl); if (hid->quirks & HID_QUIRK_ALWAYS_POLL) { ret = usb_autopm_get_interface(usbhid->intf); if (ret) goto fail; set_bit(HID_IN_POLLING, &usbhid->iofl); usbhid->intf->needs_remote_wakeup = 1; ret = hid_start_in(hid); if (ret) { dev_err(&hid->dev, "failed to start in urb: %d\n", ret); } usb_autopm_put_interface(usbhid->intf); } /* Some keyboards don't work until their LEDs have been set. * Since BIOSes do set the LEDs, it must be safe for any device * that supports the keyboard boot protocol. * In addition, enable remote wakeup by default for all keyboard * devices supporting the boot protocol. */ if (interface->desc.bInterfaceSubClass == USB_INTERFACE_SUBCLASS_BOOT && interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_KEYBOARD) { usbhid_set_leds(hid); device_set_wakeup_enable(&dev->dev, 1); } mutex_unlock(&usbhid->mutex); return 0; fail: usb_free_urb(usbhid->urbin); usb_free_urb(usbhid->urbout); usb_free_urb(usbhid->urbctrl); usbhid->urbin = NULL; usbhid->urbout = NULL; usbhid->urbctrl = NULL; hid_free_buffers(dev, hid); mutex_unlock(&usbhid->mutex); return ret; } static void usbhid_stop(struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; if (WARN_ON(!usbhid)) return; if (hid->quirks & HID_QUIRK_ALWAYS_POLL) { clear_bit(HID_IN_POLLING, &usbhid->iofl); usbhid->intf->needs_remote_wakeup = 0; } mutex_lock(&usbhid->mutex); clear_bit(HID_STARTED, &usbhid->iofl); spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ set_bit(HID_DISCONNECTED, &usbhid->iofl); while (usbhid->ctrltail != usbhid->ctrlhead) { if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_OUT) { kfree(usbhid->ctrl[usbhid->ctrltail].raw_report); usbhid->ctrl[usbhid->ctrltail].raw_report = NULL; } usbhid->ctrltail = (usbhid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1); } spin_unlock_irq(&usbhid->lock); usb_kill_urb(usbhid->urbin); usb_kill_urb(usbhid->urbout); usb_kill_urb(usbhid->urbctrl); hid_cancel_delayed_stuff(usbhid); hid->claimed = 0; usb_free_urb(usbhid->urbin); usb_free_urb(usbhid->urbctrl); usb_free_urb(usbhid->urbout); usbhid->urbin = NULL; /* don't mess up next start */ usbhid->urbctrl = NULL; usbhid->urbout = NULL; hid_free_buffers(hid_to_usb_dev(hid), hid); mutex_unlock(&usbhid->mutex); } static int usbhid_power(struct hid_device *hid, int lvl) { struct usbhid_device *usbhid = hid->driver_data; int r = 0; switch (lvl) { case PM_HINT_FULLON: r = usb_autopm_get_interface(usbhid->intf); break; case PM_HINT_NORMAL: usb_autopm_put_interface(usbhid->intf); break; } return r; } static void usbhid_request(struct hid_device *hid, struct hid_report *rep, int reqtype) { switch (reqtype) { case HID_REQ_GET_REPORT: usbhid_submit_report(hid, rep, USB_DIR_IN); break; case HID_REQ_SET_REPORT: usbhid_submit_report(hid, rep, USB_DIR_OUT); break; } } static int usbhid_raw_request(struct hid_device *hid, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) { switch (reqtype) { case HID_REQ_GET_REPORT: return usbhid_get_raw_report(hid, reportnum, buf, len, rtype); case HID_REQ_SET_REPORT: return usbhid_set_raw_report(hid, reportnum, buf, len, rtype); default: return -EIO; } } static int usbhid_idle(struct hid_device *hid, int report, int idle, int reqtype) { struct usb_device *dev = hid_to_usb_dev(hid); struct usb_interface *intf = to_usb_interface(hid->dev.parent); struct usb_host_interface *interface = intf->cur_altsetting; int ifnum = interface->desc.bInterfaceNumber; if (reqtype != HID_REQ_SET_IDLE) return -EINVAL; return hid_set_idle(dev, ifnum, report, idle); } static bool usbhid_may_wakeup(struct hid_device *hid) { struct usb_device *dev = hid_to_usb_dev(hid); return device_may_wakeup(&dev->dev); } static const struct hid_ll_driver usb_hid_driver = { .parse = usbhid_parse, .start = usbhid_start, .stop = usbhid_stop, .open = usbhid_open, .close = usbhid_close, .power = usbhid_power, .request = usbhid_request, .wait = usbhid_wait_io, .raw_request = usbhid_raw_request, .output_report = usbhid_output_report, .idle = usbhid_idle, .may_wakeup = usbhid_may_wakeup, }; bool hid_is_usb(const struct hid_device *hdev) { return hdev->ll_driver == &usb_hid_driver; } EXPORT_SYMBOL_GPL(hid_is_usb); static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_host_interface *interface = intf->cur_altsetting; struct usb_device *dev = interface_to_usbdev(intf); struct usbhid_device *usbhid; struct hid_device *hid; unsigned int n, has_in = 0; size_t len; int ret; dbg_hid("HID probe called for ifnum %d\n", intf->altsetting->desc.bInterfaceNumber); for (n = 0; n < interface->desc.bNumEndpoints; n++) if (usb_endpoint_is_int_in(&interface->endpoint[n].desc)) has_in++; if (!has_in) { hid_err(intf, "couldn't find an input interrupt endpoint\n"); return -ENODEV; } hid = hid_allocate_device(); if (IS_ERR(hid)) return PTR_ERR(hid); usb_set_intfdata(intf, hid); hid->ll_driver = &usb_hid_driver; hid->ff_init = hid_pidff_init; #ifdef CONFIG_USB_HIDDEV hid->hiddev_connect = hiddev_connect; hid->hiddev_disconnect = hiddev_disconnect; hid->hiddev_hid_event = hiddev_hid_event; hid->hiddev_report_event = hiddev_report_event; #endif hid->dev.parent = &intf->dev; device_set_node(&hid->dev, dev_fwnode(&intf->dev)); hid->bus = BUS_USB; hid->vendor = le16_to_cpu(dev->descriptor.idVendor); hid->product = le16_to_cpu(dev->descriptor.idProduct); hid->version = le16_to_cpu(dev->descriptor.bcdDevice); hid->name[0] = 0; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) hid->type = HID_TYPE_USBMOUSE; else if (intf->cur_altsetting->desc.bInterfaceProtocol == 0) hid->type = HID_TYPE_USBNONE; if (dev->manufacturer) strscpy(hid->name, dev->manufacturer, sizeof(hid->name)); if (dev->product) { if (dev->manufacturer) strlcat(hid->name, " ", sizeof(hid->name)); strlcat(hid->name, dev->product, sizeof(hid->name)); } if (!strlen(hid->name)) snprintf(hid->name, sizeof(hid->name), "HID %04x:%04x", le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); usb_make_path(dev, hid->phys, sizeof(hid->phys)); strlcat(hid->phys, "/input", sizeof(hid->phys)); len = strlen(hid->phys); if (len < sizeof(hid->phys) - 1) snprintf(hid->phys + len, sizeof(hid->phys) - len, "%d", intf->altsetting[0].desc.bInterfaceNumber); if (usb_string(dev, dev->descriptor.iSerialNumber, hid->uniq, 64) <= 0) hid->uniq[0] = 0; usbhid = kzalloc(sizeof(*usbhid), GFP_KERNEL); if (usbhid == NULL) { ret = -ENOMEM; goto err; } hid->driver_data = usbhid; usbhid->hid = hid; usbhid->intf = intf; usbhid->ifnum = interface->desc.bInterfaceNumber; init_waitqueue_head(&usbhid->wait); INIT_WORK(&usbhid->reset_work, hid_reset); timer_setup(&usbhid->io_retry, hid_retry_timeout, 0); spin_lock_init(&usbhid->lock); mutex_init(&usbhid->mutex); ret = hid_add_device(hid); if (ret) { if (ret != -ENODEV) hid_err(intf, "can't add hid device: %d\n", ret); goto err_free; } return 0; err_free: kfree(usbhid); err: hid_destroy_device(hid); return ret; } static void usbhid_disconnect(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata(intf); struct usbhid_device *usbhid; if (WARN_ON(!hid)) return; usbhid = hid->driver_data; spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ set_bit(HID_DISCONNECTED, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); hid_destroy_device(hid); kfree(usbhid); } static void hid_cancel_delayed_stuff(struct usbhid_device *usbhid) { del_timer_sync(&usbhid->io_retry); cancel_work_sync(&usbhid->reset_work); } static void hid_cease_io(struct usbhid_device *usbhid) { del_timer_sync(&usbhid->io_retry); usb_kill_urb(usbhid->urbin); usb_kill_urb(usbhid->urbctrl); usb_kill_urb(usbhid->urbout); } static void hid_restart_io(struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; int clear_halt = test_bit(HID_CLEAR_HALT, &usbhid->iofl); int reset_pending = test_bit(HID_RESET_PENDING, &usbhid->iofl); spin_lock_irq(&usbhid->lock); clear_bit(HID_SUSPENDED, &usbhid->iofl); usbhid_mark_busy(usbhid); if (clear_halt || reset_pending) schedule_work(&usbhid->reset_work); usbhid->retry_delay = 0; spin_unlock_irq(&usbhid->lock); if (reset_pending || !test_bit(HID_STARTED, &usbhid->iofl)) return; if (!clear_halt) { if (hid_start_in(hid) < 0) hid_io_error(hid); } spin_lock_irq(&usbhid->lock); if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl)) usbhid_restart_out_queue(usbhid); if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) usbhid_restart_ctrl_queue(usbhid); spin_unlock_irq(&usbhid->lock); } /* Treat USB reset pretty much the same as suspend/resume */ static int hid_pre_reset(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata(intf); struct usbhid_device *usbhid = hid->driver_data; spin_lock_irq(&usbhid->lock); set_bit(HID_RESET_PENDING, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); hid_cease_io(usbhid); return 0; } /* Same routine used for post_reset and reset_resume */ static int hid_post_reset(struct usb_interface *intf) { struct usb_device *dev = interface_to_usbdev (intf); struct hid_device *hid = usb_get_intfdata(intf); struct usbhid_device *usbhid = hid->driver_data; struct usb_host_interface *interface = intf->cur_altsetting; int status; char *rdesc; /* Fetch and examine the HID report descriptor. If this * has changed, then rebind. Since usbcore's check of the * configuration descriptors passed, we already know that * the size of the HID report descriptor has not changed. */ rdesc = kmalloc(hid->dev_rsize, GFP_KERNEL); if (!rdesc) return -ENOMEM; status = hid_get_class_descriptor(dev, interface->desc.bInterfaceNumber, HID_DT_REPORT, rdesc, hid->dev_rsize); if (status < 0) { dbg_hid("reading report descriptor failed (post_reset)\n"); kfree(rdesc); return status; } status = memcmp(rdesc, hid->dev_rdesc, hid->dev_rsize); kfree(rdesc); if (status != 0) { dbg_hid("report descriptor changed\n"); return -EPERM; } /* No need to do another reset or clear a halted endpoint */ spin_lock_irq(&usbhid->lock); clear_bit(HID_RESET_PENDING, &usbhid->iofl); clear_bit(HID_CLEAR_HALT, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); hid_set_idle(dev, intf->cur_altsetting->desc.bInterfaceNumber, 0, 0); hid_restart_io(hid); return 0; } static int hid_resume_common(struct hid_device *hid, bool driver_suspended) { int status = 0; hid_restart_io(hid); if (driver_suspended) status = hid_driver_resume(hid); return status; } static int hid_suspend(struct usb_interface *intf, pm_message_t message) { struct hid_device *hid = usb_get_intfdata(intf); struct usbhid_device *usbhid = hid->driver_data; int status = 0; bool driver_suspended = false; unsigned int ledcount; if (PMSG_IS_AUTO(message)) { ledcount = hidinput_count_leds(hid); spin_lock_irq(&usbhid->lock); /* Sync with error handler */ if (!test_bit(HID_RESET_PENDING, &usbhid->iofl) && !test_bit(HID_CLEAR_HALT, &usbhid->iofl) && !test_bit(HID_OUT_RUNNING, &usbhid->iofl) && !test_bit(HID_CTRL_RUNNING, &usbhid->iofl) && !test_bit(HID_KEYS_PRESSED, &usbhid->iofl) && (!ledcount || ignoreled)) { set_bit(HID_SUSPENDED, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); status = hid_driver_suspend(hid, message); if (status < 0) goto failed; driver_suspended = true; } else { usbhid_mark_busy(usbhid); spin_unlock_irq(&usbhid->lock); return -EBUSY; } } else { /* TODO: resume() might need to handle suspend failure */ status = hid_driver_suspend(hid, message); driver_suspended = true; spin_lock_irq(&usbhid->lock); set_bit(HID_SUSPENDED, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); if (usbhid_wait_io(hid) < 0) status = -EIO; } hid_cancel_delayed_stuff(usbhid); hid_cease_io(usbhid); if (PMSG_IS_AUTO(message) && test_bit(HID_KEYS_PRESSED, &usbhid->iofl)) { /* lost race against keypresses */ status = -EBUSY; goto failed; } dev_dbg(&intf->dev, "suspend\n"); return status; failed: hid_resume_common(hid, driver_suspended); return status; } static int hid_resume(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata (intf); int status; status = hid_resume_common(hid, true); dev_dbg(&intf->dev, "resume status %d\n", status); return 0; } static int hid_reset_resume(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata(intf); int status; status = hid_post_reset(intf); if (status >= 0) { int ret = hid_driver_reset_resume(hid); if (ret < 0) status = ret; } return status; } static const struct usb_device_id hid_usb_ids[] = { { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, .bInterfaceClass = USB_INTERFACE_CLASS_HID }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE (usb, hid_usb_ids); static struct usb_driver hid_driver = { .name = "usbhid", .probe = usbhid_probe, .disconnect = usbhid_disconnect, .suspend = pm_ptr(hid_suspend), .resume = pm_ptr(hid_resume), .reset_resume = pm_ptr(hid_reset_resume), .pre_reset = hid_pre_reset, .post_reset = hid_post_reset, .id_table = hid_usb_ids, .supports_autosuspend = 1, }; struct usb_interface *usbhid_find_interface(int minor) { return usb_find_interface(&hid_driver, minor); } static int __init hid_init(void) { int retval; retval = hid_quirks_init(quirks_param, BUS_USB, MAX_USBHID_BOOT_QUIRKS); if (retval) goto usbhid_quirks_init_fail; retval = usb_register(&hid_driver); if (retval) goto usb_register_fail; pr_info(KBUILD_MODNAME ": " DRIVER_DESC "\n"); return 0; usb_register_fail: hid_quirks_exit(BUS_USB); usbhid_quirks_init_fail: return retval; } static void __exit hid_exit(void) { usb_deregister(&hid_driver); hid_quirks_exit(BUS_USB); } module_init(hid_init); module_exit(hid_exit); MODULE_AUTHOR("Andreas Gal"); MODULE_AUTHOR("Vojtech Pavlik"); MODULE_AUTHOR("Jiri Kosina"); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
33 1 34 34 34 34 34 14 14 14 14 14 48 48 160 159 1 160 102 82 160 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 /* * Mu-Law conversion Plug-In Interface * Copyright (c) 1999 by Jaroslav Kysela <perex@perex.cz> * Uros Bizjak <uros@kss-loka.si> * * Based on reference implementation by Sun Microsystems, Inc. * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include "pcm_plugin.h" #define SIGN_BIT (0x80) /* Sign bit for a u-law byte. */ #define QUANT_MASK (0xf) /* Quantization field mask. */ #define NSEGS (8) /* Number of u-law segments. */ #define SEG_SHIFT (4) /* Left shift for segment number. */ #define SEG_MASK (0x70) /* Segment field mask. */ static inline int val_seg(int val) { int r = 0; val >>= 7; if (val & 0xf0) { val >>= 4; r += 4; } if (val & 0x0c) { val >>= 2; r += 2; } if (val & 0x02) r += 1; return r; } #define BIAS (0x84) /* Bias for linear code. */ /* * linear2ulaw() - Convert a linear PCM value to u-law * * In order to simplify the encoding process, the original linear magnitude * is biased by adding 33 which shifts the encoding range from (0 - 8158) to * (33 - 8191). The result can be seen in the following encoding table: * * Biased Linear Input Code Compressed Code * ------------------------ --------------- * 00000001wxyza 000wxyz * 0000001wxyzab 001wxyz * 000001wxyzabc 010wxyz * 00001wxyzabcd 011wxyz * 0001wxyzabcde 100wxyz * 001wxyzabcdef 101wxyz * 01wxyzabcdefg 110wxyz * 1wxyzabcdefgh 111wxyz * * Each biased linear code has a leading 1 which identifies the segment * number. The value of the segment number is equal to 7 minus the number * of leading 0's. The quantization interval is directly available as the * four bits wxyz. * The trailing bits (a - h) are ignored. * * Ordinarily the complement of the resulting code word is used for * transmission, and so the code word is complemented before it is returned. * * For further information see John C. Bellamy's Digital Telephony, 1982, * John Wiley & Sons, pps 98-111 and 472-476. */ static unsigned char linear2ulaw(int pcm_val) /* 2's complement (16-bit range) */ { int mask; int seg; unsigned char uval; /* Get the sign and the magnitude of the value. */ if (pcm_val < 0) { pcm_val = BIAS - pcm_val; mask = 0x7F; } else { pcm_val += BIAS; mask = 0xFF; } if (pcm_val > 0x7FFF) pcm_val = 0x7FFF; /* Convert the scaled magnitude to segment number. */ seg = val_seg(pcm_val); /* * Combine the sign, segment, quantization bits; * and complement the code word. */ uval = (seg << 4) | ((pcm_val >> (seg + 3)) & 0xF); return uval ^ mask; } /* * ulaw2linear() - Convert a u-law value to 16-bit linear PCM * * First, a biased linear code is derived from the code word. An unbiased * output can then be obtained by subtracting 33 from the biased code. * * Note that this function expects to be passed the complement of the * original code word. This is in keeping with ISDN conventions. */ static int ulaw2linear(unsigned char u_val) { int t; /* Complement to obtain normal u-law value. */ u_val = ~u_val; /* * Extract and bias the quantization bits. Then * shift up by the segment number and subtract out the bias. */ t = ((u_val & QUANT_MASK) << 3) + BIAS; t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT; return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS)); } /* * Basic Mu-Law plugin */ typedef void (*mulaw_f)(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames); struct mulaw_priv { mulaw_f func; int cvt_endian; /* need endian conversion? */ unsigned int native_ofs; /* byte offset in native format */ unsigned int copy_ofs; /* byte offset in s16 format */ unsigned int native_bytes; /* byte size of the native format */ unsigned int copy_bytes; /* bytes to copy per conversion */ u16 flip; /* MSB flip for signedness, done after endian conversion */ }; static inline void cvt_s16_to_native(struct mulaw_priv *data, unsigned char *dst, u16 sample) { sample ^= data->flip; if (data->cvt_endian) sample = swab16(sample); if (data->native_bytes > data->copy_bytes) memset(dst, 0, data->native_bytes); memcpy(dst + data->native_ofs, (char *)&sample + data->copy_ofs, data->copy_bytes); } static void mulaw_decode(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { struct mulaw_priv *data = (struct mulaw_priv *)plugin->extra_data; int channel; int nchannels = plugin->src_format.channels; for (channel = 0; channel < nchannels; ++channel) { char *src; char *dst; int src_step, dst_step; snd_pcm_uframes_t frames1; if (!src_channels[channel].enabled) { if (dst_channels[channel].wanted) snd_pcm_area_silence(&dst_channels[channel].area, 0, frames, plugin->dst_format.format); dst_channels[channel].enabled = 0; continue; } dst_channels[channel].enabled = 1; src = src_channels[channel].area.addr + src_channels[channel].area.first / 8; dst = dst_channels[channel].area.addr + dst_channels[channel].area.first / 8; src_step = src_channels[channel].area.step / 8; dst_step = dst_channels[channel].area.step / 8; frames1 = frames; while (frames1-- > 0) { signed short sample = ulaw2linear(*src); cvt_s16_to_native(data, dst, sample); src += src_step; dst += dst_step; } } } static inline signed short cvt_native_to_s16(struct mulaw_priv *data, unsigned char *src) { u16 sample = 0; memcpy((char *)&sample + data->copy_ofs, src + data->native_ofs, data->copy_bytes); if (data->cvt_endian) sample = swab16(sample); sample ^= data->flip; return (signed short)sample; } static void mulaw_encode(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { struct mulaw_priv *data = (struct mulaw_priv *)plugin->extra_data; int channel; int nchannels = plugin->src_format.channels; for (channel = 0; channel < nchannels; ++channel) { char *src; char *dst; int src_step, dst_step; snd_pcm_uframes_t frames1; if (!src_channels[channel].enabled) { if (dst_channels[channel].wanted) snd_pcm_area_silence(&dst_channels[channel].area, 0, frames, plugin->dst_format.format); dst_channels[channel].enabled = 0; continue; } dst_channels[channel].enabled = 1; src = src_channels[channel].area.addr + src_channels[channel].area.first / 8; dst = dst_channels[channel].area.addr + dst_channels[channel].area.first / 8; src_step = src_channels[channel].area.step / 8; dst_step = dst_channels[channel].area.step / 8; frames1 = frames; while (frames1-- > 0) { signed short sample = cvt_native_to_s16(data, src); *dst = linear2ulaw(sample); src += src_step; dst += dst_step; } } } static snd_pcm_sframes_t mulaw_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { struct mulaw_priv *data; if (snd_BUG_ON(!plugin || !src_channels || !dst_channels)) return -ENXIO; if (frames == 0) return 0; #ifdef CONFIG_SND_DEBUG { unsigned int channel; for (channel = 0; channel < plugin->src_format.channels; channel++) { if (snd_BUG_ON(src_channels[channel].area.first % 8 || src_channels[channel].area.step % 8)) return -ENXIO; if (snd_BUG_ON(dst_channels[channel].area.first % 8 || dst_channels[channel].area.step % 8)) return -ENXIO; } } #endif if (frames > dst_channels[0].frames) frames = dst_channels[0].frames; data = (struct mulaw_priv *)plugin->extra_data; data->func(plugin, src_channels, dst_channels, frames); return frames; } static void init_data(struct mulaw_priv *data, snd_pcm_format_t format) { #ifdef SNDRV_LITTLE_ENDIAN data->cvt_endian = snd_pcm_format_big_endian(format) > 0; #else data->cvt_endian = snd_pcm_format_little_endian(format) > 0; #endif if (!snd_pcm_format_signed(format)) data->flip = 0x8000; data->native_bytes = snd_pcm_format_physical_width(format) / 8; data->copy_bytes = data->native_bytes < 2 ? 1 : 2; if (snd_pcm_format_little_endian(format)) { data->native_ofs = data->native_bytes - data->copy_bytes; data->copy_ofs = 2 - data->copy_bytes; } else { /* S24 in 4bytes need an 1 byte offset */ data->native_ofs = data->native_bytes - snd_pcm_format_width(format) / 8; } } int snd_pcm_plugin_build_mulaw(struct snd_pcm_substream *plug, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, struct snd_pcm_plugin **r_plugin) { int err; struct mulaw_priv *data; struct snd_pcm_plugin *plugin; struct snd_pcm_plugin_format *format; mulaw_f func; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(src_format->rate != dst_format->rate)) return -ENXIO; if (snd_BUG_ON(src_format->channels != dst_format->channels)) return -ENXIO; if (dst_format->format == SNDRV_PCM_FORMAT_MU_LAW) { format = src_format; func = mulaw_encode; } else if (src_format->format == SNDRV_PCM_FORMAT_MU_LAW) { format = dst_format; func = mulaw_decode; } else { snd_BUG(); return -EINVAL; } if (!snd_pcm_format_linear(format->format)) return -EINVAL; err = snd_pcm_plugin_build(plug, "Mu-Law<->linear conversion", src_format, dst_format, sizeof(struct mulaw_priv), &plugin); if (err < 0) return err; data = (struct mulaw_priv *)plugin->extra_data; data->func = func; init_data(data, format->format); plugin->transfer = mulaw_transfer; *r_plugin = plugin; return 0; }
65 28 26 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux NET3: Internet Group Management Protocol [IGMP] * * Authors: * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Extended to talk the BSD extended IGMP protocol of mrouted 3.6 */ #ifndef _LINUX_IGMP_H #define _LINUX_IGMP_H #include <linux/skbuff.h> #include <linux/timer.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/refcount.h> #include <linux/sockptr.h> #include <uapi/linux/igmp.h> static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb) { return (struct igmphdr *)skb_transport_header(skb); } static inline struct igmpv3_report * igmpv3_report_hdr(const struct sk_buff *skb) { return (struct igmpv3_report *)skb_transport_header(skb); } static inline struct igmpv3_query * igmpv3_query_hdr(const struct sk_buff *skb) { return (struct igmpv3_query *)skb_transport_header(skb); } struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; __be32 sl_addr[] __counted_by(sl_max); }; #define IP_SFBLOCK 10 /* allocate this many at once */ /* ip_mc_socklist is real list now. Speed is not argument; this list never used in fast path code */ struct ip_mc_socklist { struct ip_mc_socklist __rcu *next_rcu; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist __rcu *sflist; struct rcu_head rcu; }; struct ip_sf_list { struct ip_sf_list *sf_next; unsigned long sf_count[2]; /* include/exclude counts */ __be32 sf_inaddr; unsigned char sf_gsresp; /* include in g & s response? */ unsigned char sf_oldin; /* change state */ unsigned char sf_crcount; /* retrans. left to send */ }; struct ip_mc_list { struct in_device *interface; __be32 multiaddr; unsigned int sfmode; struct ip_sf_list *sources; struct ip_sf_list *tomb; unsigned long sfcount[2]; union { struct ip_mc_list *next; struct ip_mc_list __rcu *next_rcu; }; struct ip_mc_list __rcu *next_hash; struct timer_list timer; int users; refcount_t refcnt; spinlock_t lock; char tm_running; char reporter; char unsolicit_count; char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; unsigned long mca_cstamp; unsigned long mca_tstamp; struct rcu_head rcu; }; /* V3 exponential field decoding */ #define IGMPV3_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value)) #define IGMPV3_EXP(thresh, nbmant, nbexp, value) \ ((value) < (thresh) ? (value) : \ ((IGMPV3_MASK(value, nbmant) | (1<<(nbmant))) << \ (IGMPV3_MASK((value) >> (nbmant), nbexp) + (nbexp)))) #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len) { if (skb_transport_offset(skb) + ip_transport_len(skb) < len) return 0; return pskb_may_pull(skb, len); } extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr, unsigned int mode); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mreq_source *mreqs, int ifindex); extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex); extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, sockptr_t optval, sockptr_t optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, sockptr_t optval, size_t offset); extern int ip_mc_sf_allow(const struct sock *sk, __be32 local, __be32 rmt, int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); extern void ip_mc_up(struct in_device *); extern void ip_mc_down(struct in_device *); extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp); static inline void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { return __ip_mc_dec_group(in_dev, addr, GFP_KERNEL); } extern void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, gfp_t gfp); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); int ip_mc_check_igmp(struct sk_buff *skb); #endif
3792 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Based on arch/arm/include/asm/ptrace.h * * Copyright (C) 1996-2003 Russell King * Copyright (C) 2012 ARM Ltd. */ #ifndef __ASM_PTRACE_H #define __ASM_PTRACE_H #include <asm/cpufeature.h> #include <uapi/asm/ptrace.h> /* Current Exception Level values, as contained in CurrentEL */ #define CurrentEL_EL1 (1 << 2) #define CurrentEL_EL2 (2 << 2) #define INIT_PSTATE_EL1 \ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h) #define INIT_PSTATE_EL2 \ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h) #include <linux/irqchip/arm-gic-v3-prio.h> #define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED #define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ #define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET /* Additional SPSR bits not exposed in the UABI */ #define PSR_MODE_THREAD_BIT (1 << 0) #define PSR_IL_BIT (1 << 20) /* AArch32-specific ptrace requests */ #define COMPAT_PTRACE_GETREGS 12 #define COMPAT_PTRACE_SETREGS 13 #define COMPAT_PTRACE_GET_THREAD_AREA 22 #define COMPAT_PTRACE_SET_SYSCALL 23 #define COMPAT_PTRACE_GETVFPREGS 27 #define COMPAT_PTRACE_SETVFPREGS 28 #define COMPAT_PTRACE_GETHBPREGS 29 #define COMPAT_PTRACE_SETHBPREGS 30 /* SPSR_ELx bits for exceptions taken from AArch32 */ #define PSR_AA32_MODE_MASK 0x0000001f #define PSR_AA32_MODE_USR 0x00000010 #define PSR_AA32_MODE_FIQ 0x00000011 #define PSR_AA32_MODE_IRQ 0x00000012 #define PSR_AA32_MODE_SVC 0x00000013 #define PSR_AA32_MODE_ABT 0x00000017 #define PSR_AA32_MODE_HYP 0x0000001a #define PSR_AA32_MODE_UND 0x0000001b #define PSR_AA32_MODE_SYS 0x0000001f #define PSR_AA32_T_BIT 0x00000020 #define PSR_AA32_F_BIT 0x00000040 #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 #define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 #define PSR_AA32_C_BIT 0x20000000 #define PSR_AA32_Z_BIT 0x40000000 #define PSR_AA32_N_BIT 0x80000000 #define PSR_AA32_IT_MASK 0x0600fc00 /* If-Then execution state mask */ #define PSR_AA32_GE_MASK 0x000f0000 #ifdef CONFIG_CPU_BIG_ENDIAN #define PSR_AA32_ENDSTATE PSR_AA32_E_BIT #else #define PSR_AA32_ENDSTATE 0 #endif /* AArch32 CPSR bits, as seen in AArch32 */ #define COMPAT_PSR_DIT_BIT 0x00200000 /* * These are 'magic' values for PTRACE_PEEKUSR that return info about where a * process is located in memory. */ #define COMPAT_PT_TEXT_ADDR 0x10000 #define COMPAT_PT_DATA_ADDR 0x10004 #define COMPAT_PT_TEXT_END_ADDR 0x10008 /* * If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing * a syscall -- i.e., its most recent entry into the kernel from * userspace was not via SVC, or otherwise a tracer cancelled the syscall. * * This must have the value -1, for ABI compatibility with ptrace etc. */ #define NO_SYSCALL (-1) #ifndef __ASSEMBLY__ #include <linux/bug.h> #include <linux/types.h> #include <asm/stacktrace/frame.h> /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 /* Architecturally defined mapping between AArch32 and AArch64 registers */ #define compat_usr(x) regs[(x)] #define compat_fp regs[11] #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] #define compat_lr_irq regs[16] #define compat_sp_irq regs[17] #define compat_lr_svc regs[18] #define compat_sp_svc regs[19] #define compat_lr_abt regs[20] #define compat_sp_abt regs[21] #define compat_lr_und regs[22] #define compat_sp_und regs[23] #define compat_r8_fiq regs[24] #define compat_r9_fiq regs[25] #define compat_r10_fiq regs[26] #define compat_r11_fiq regs[27] #define compat_r12_fiq regs[28] #define compat_sp_fiq regs[29] #define compat_lr_fiq regs[30] static inline unsigned long compat_psr_to_pstate(const unsigned long psr) { unsigned long pstate; pstate = psr & ~COMPAT_PSR_DIT_BIT; if (psr & COMPAT_PSR_DIT_BIT) pstate |= PSR_AA32_DIT_BIT; return pstate; } static inline unsigned long pstate_to_compat_psr(const unsigned long pstate) { unsigned long psr; psr = pstate & ~PSR_AA32_DIT_BIT; if (pstate & PSR_AA32_DIT_BIT) psr |= COMPAT_PSR_DIT_BIT; return psr; } /* * This struct defines the way the registers are stored on the stack during an * exception. struct user_pt_regs must form a prefix of struct pt_regs. */ struct pt_regs { union { struct user_pt_regs user_regs; struct { u64 regs[31]; u64 sp; u64 pc; u64 pstate; }; }; u64 orig_x0; s32 syscallno; u32 pmr; u64 sdei_ttbr1; struct frame_record_meta stackframe; /* Only valid for some EL1 exceptions. */ u64 lockdep_hardirqs; u64 exit_rcu; }; /* For correct stack alignment, pt_regs has to be a multiple of 16 bytes. */ static_assert(IS_ALIGNED(sizeof(struct pt_regs), 16)); static inline bool in_syscall(struct pt_regs const *regs) { return regs->syscallno != NO_SYSCALL; } static inline void forget_syscall(struct pt_regs *regs) { regs->syscallno = NO_SYSCALL; } #define MAX_REG_OFFSET offsetof(struct pt_regs, pstate) #define arch_has_single_step() (1) #ifdef CONFIG_COMPAT #define compat_thumb_mode(regs) \ (((regs)->pstate & PSR_AA32_T_BIT)) #else #define compat_thumb_mode(regs) (0) #endif #define user_mode(regs) \ (((regs)->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t) #define compat_user_mode(regs) \ (((regs)->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) == \ (PSR_MODE32_BIT | PSR_MODE_EL0t)) #define processor_mode(regs) \ ((regs)->pstate & PSR_MODE_MASK) #define irqs_priority_unmasked(regs) \ (system_uses_irq_prio_masking() ? \ (regs)->pmr == GIC_PRIO_IRQON : \ true) #define interrupts_enabled(regs) \ (!((regs)->pstate & PSR_I_BIT) && irqs_priority_unmasked(regs)) #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) static inline unsigned long user_stack_pointer(struct pt_regs *regs) { if (compat_user_mode(regs)) return regs->compat_sp; return regs->sp; } extern int regs_query_register_offset(const char *name); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); /** * regs_get_register() - get register value from its offset * @regs: pt_regs from which register value is gotten * @offset: offset of the register. * * regs_get_register returns the value of a register whose offset from @regs. * The @offset is the offset of the register in struct pt_regs. * If @offset is bigger than MAX_REG_OFFSET, this returns 0. */ static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset) { u64 val = 0; WARN_ON(offset & 7); offset >>= 3; switch (offset) { case 0 ... 30: val = regs->regs[offset]; break; case offsetof(struct pt_regs, sp) >> 3: val = regs->sp; break; case offsetof(struct pt_regs, pc) >> 3: val = regs->pc; break; case offsetof(struct pt_regs, pstate) >> 3: val = regs->pstate; break; default: val = 0; } return val; } /* * Read a register given an architectural register index r. * This handles the common case where 31 means XZR, not SP. */ static inline unsigned long pt_regs_read_reg(const struct pt_regs *regs, int r) { return (r == 31) ? 0 : regs->regs[r]; } /* * Write a register given an architectural register index r. * This handles the common case where 31 means XZR, not SP. */ static inline void pt_regs_write_reg(struct pt_regs *regs, int r, unsigned long val) { if (r != 31) regs->regs[r] = val; } /* Valid only for Kernel mode traps. */ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } static inline unsigned long regs_return_value(struct pt_regs *regs) { unsigned long val = regs->regs[0]; /* * Audit currently uses regs_return_value() instead of * syscall_get_return_value(). Apply the same sign-extension here until * audit is updated to use syscall_get_return_value(). */ if (compat_user_mode(regs)) val = sign_extend64(val, 31); return val; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->regs[0] = rc; } /** * regs_get_kernel_argument() - get Nth function argument in kernel * @regs: pt_regs of that context * @n: function argument number (start from 0) * * regs_get_argument() returns @n th argument of the function call. * * Note that this chooses the most likely register mapping. In very rare * cases this may not return correct data, for example, if one of the * function parameters is 16 bytes or bigger. In such cases, we cannot * get access the parameter correctly and the register assignment of * subsequent parameters will be shifted. */ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, unsigned int n) { #define NR_REG_ARGUMENTS 8 if (n < NR_REG_ARGUMENTS) return pt_regs_read_reg(regs, n); return 0; } /* We must avoid circular header include via sched.h */ struct task_struct; int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); static inline unsigned long instruction_pointer(struct pt_regs *regs) { return regs->pc; } static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->pc = val; } static inline unsigned long frame_pointer(struct pt_regs *regs) { return regs->regs[29]; } #define procedure_link_pointer(regs) ((regs)->regs[30]) static inline void procedure_link_pointer_set(struct pt_regs *regs, unsigned long val) { procedure_link_pointer(regs) = val; } extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ #endif
6 6 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ */ #ifndef __EROFS_XATTR_H #define __EROFS_XATTR_H #include "internal.h" #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> /* Attribute not found */ #define ENOATTR ENODATA #ifdef CONFIG_EROFS_FS_XATTR extern const struct xattr_handler erofs_xattr_user_handler; extern const struct xattr_handler erofs_xattr_trusted_handler; extern const struct xattr_handler erofs_xattr_security_handler; static inline const char *erofs_xattr_prefix(unsigned int idx, struct dentry *dentry) { const struct xattr_handler *handler = NULL; static const struct xattr_handler * const xattr_handler_map[] = { [EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler, #ifdef CONFIG_EROFS_FS_POSIX_ACL [EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, [EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, #endif [EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler, #ifdef CONFIG_EROFS_FS_SECURITY [EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler, #endif }; if (idx && idx < ARRAY_SIZE(xattr_handler_map)) handler = xattr_handler_map[idx]; if (!xattr_handler_can_list(handler, dentry)) return NULL; return xattr_prefix(handler); } extern const struct xattr_handler * const erofs_xattr_handlers[]; int erofs_xattr_prefixes_init(struct super_block *sb); void erofs_xattr_prefixes_cleanup(struct super_block *sb); int erofs_getxattr(struct inode *, int, const char *, void *, size_t); ssize_t erofs_listxattr(struct dentry *, char *, size_t); #else static inline int erofs_xattr_prefixes_init(struct super_block *sb) { return 0; } static inline void erofs_xattr_prefixes_cleanup(struct super_block *sb) {} static inline int erofs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size) { return -EOPNOTSUPP; } #define erofs_listxattr (NULL) #define erofs_xattr_handlers (NULL) #endif /* !CONFIG_EROFS_FS_XATTR */ #ifdef CONFIG_EROFS_FS_POSIX_ACL struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu); #else #define erofs_get_acl (NULL) #endif #endif
15 14 14 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 /* * linux/fs/nls/mac-celtic.c * * Charset macceltic translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ /* * COPYRIGHT AND PERMISSION NOTICE * * Copyright 1991-2012 Unicode, Inc. All rights reserved. Distributed under * the Terms of Use in http://www.unicode.org/copyright.html. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of the Unicode data files and any associated documentation (the "Data * Files") or Unicode software and any associated documentation (the * "Software") to deal in the Data Files or Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, and/or sell copies of the Data Files or Software, and * to permit persons to whom the Data Files or Software are furnished to do * so, provided that (a) the above copyright notice(s) and this permission * notice appear with all copies of the Data Files or Software, (b) both the * above copyright notice(s) and this permission notice appear in associated * documentation, and (c) there is clear notice in each modified Data File or * in the Software as well as in the documentation associated with the Data * File(s) or Software that the data or software has been modified. * * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THE DATA FILES OR SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or other * dealings in these Data Files or Software without prior written * authorization of the copyright holder. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80 */ 0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1, 0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8, /* 0x90 */ 0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3, 0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc, /* 0xa0 */ 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8, /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211, 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8, /* 0xc0 */ 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153, /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, 0x00ff, 0x0178, 0x2044, 0x20ac, 0x2039, 0x203a, 0x0176, 0x0177, /* 0xe0 */ 0x2021, 0x00b7, 0x1ef2, 0x1ef3, 0x2030, 0x00c2, 0x00ca, 0x00c1, 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4, /* 0xf0 */ 0x2663, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x00dd, 0x00fd, 0x0174, 0x0175, 0x1e84, 0x1e85, 0x1e80, 0x1e81, 0x1e82, 0x1e83, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xca, 0xc1, 0xa2, 0xa3, 0x00, 0xb4, 0x00, 0xa4, /* 0xa0-0xa7 */ 0xac, 0xa9, 0xbb, 0xc7, 0xc2, 0x00, 0xa8, 0x00, /* 0xa8-0xaf */ 0xa1, 0xb1, 0x00, 0x00, 0xab, 0xb5, 0xa6, 0xe1, /* 0xb0-0xb7 */ 0x00, 0x00, 0xbc, 0xc8, 0x00, 0x00, 0x00, 0xc0, /* 0xb8-0xbf */ 0xcb, 0xe7, 0xe5, 0xcc, 0x80, 0x81, 0xae, 0x82, /* 0xc0-0xc7 */ 0xe9, 0x83, 0xe6, 0xe8, 0xed, 0xea, 0xeb, 0xec, /* 0xc8-0xcf */ 0x00, 0x84, 0xf1, 0xee, 0xef, 0xcd, 0x85, 0x00, /* 0xd0-0xd7 */ 0xaf, 0xf4, 0xf2, 0xf3, 0x86, 0xf6, 0x00, 0xa7, /* 0xd8-0xdf */ 0x88, 0x87, 0x89, 0x8b, 0x8a, 0x8c, 0xbe, 0x8d, /* 0xe0-0xe7 */ 0x8f, 0x8e, 0x90, 0x91, 0x93, 0x92, 0x94, 0x95, /* 0xe8-0xef */ 0x00, 0x96, 0x98, 0x97, 0x99, 0x9b, 0x9a, 0xd6, /* 0xf0-0xf7 */ 0xbf, 0x9d, 0x9c, 0x9e, 0x9f, 0xf7, 0x00, 0xd8, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0xce, 0xcf, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0xf8, 0xf9, 0xde, 0xdf, /* 0x70-0x77 */ 0xd9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0xbd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0xb9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page1e[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xfc, 0xfd, 0xfe, 0xff, 0xfa, 0xfb, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0xe2, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0xd0, 0xd1, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd4, 0xd5, 0x00, 0x00, 0xd2, 0xd3, 0x00, 0x00, /* 0x18-0x1f */ 0xa0, 0xe0, 0xa5, 0x00, 0x00, 0x00, 0xc9, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0xdc, 0xdd, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page21[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0xc6, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, /* 0x08-0x0f */ 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, 0xb0, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0xba, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xc5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xad, 0x00, 0x00, 0x00, 0xb2, 0xb3, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page25[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0xd7, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page26[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page1e, NULL, page20, page21, page22, NULL, NULL, page25, page26, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x00-0x07 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x08-0x0f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x10-0x17 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x18-0x1f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x20-0x27 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x28-0x2f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x30-0x37 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x38-0x3f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x40-0x47 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x48-0x4f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x50-0x57 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x58-0x5f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x60-0x67 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x68-0x6f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x70-0x77 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x78-0x7f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x80-0x87 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x88-0x8f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x90-0x97 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0x98-0x9f */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xa0-0xa7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xa8-0xaf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xb0-0xb7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xb8-0xbf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xc0-0xc7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xc8-0xcf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xd0-0xd7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xd8-0xdf */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xe0-0xe7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xe8-0xef */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0-0xf7 */ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "macceltic", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_macceltic(void) { return register_nls(&table); } static void __exit exit_nls_macceltic(void) { unregister_nls(&table); } module_init(init_nls_macceltic) module_exit(exit_nls_macceltic) MODULE_DESCRIPTION("NLS Codepage macceltic"); MODULE_LICENSE("Dual BSD/GPL");
12 1 12 12 1 12 13 14 12 12 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/file.h> #include <linux/cdev.h> #include <linux/anon_inodes.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> #include <rdma/ib.h> #include <rdma/uverbs_std_types.h> #include <rdma/rdma_netlink.h> #include "uverbs.h" #include "core_priv.h" #include "rdma_core.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS, IB_UVERBS_NUM_FIXED_MINOR = 32, IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR, }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static dev_t dynamic_uverbs_dev; static DEFINE_IDA(uverbs_ida); static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); static struct ib_client uverbs_client; static char *uverbs_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0666; return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static const struct class uverbs_class = { .name = "infiniband_verbs", .devnode = uverbs_devnode, }; /* * Must be called with the ufile->device->disassociate_srcu held, and the lock * must be held until use of the ucontext is finished. */ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) { /* * We do not hold the hw_destroy_rwsem lock for this flow, instead * srcu is used. It does not matter if someone races this with * get_context, we get NULL or valid ucontext. */ struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); if (!srcu_dereference(ufile->device->ib_dev, &ufile->device->disassociate_srcu)) return ERR_PTR(-EIO); if (!ucontext) return ERR_PTR(-EINVAL); return ucontext; } EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; int ret; ret = mw->device->ops.dealloc_mw(mw); if (ret) return ret; atomic_dec(&pd->usecnt); kfree(mw); return ret; } static void ib_uverbs_release_dev(struct device *device) { struct ib_uverbs_device *dev = container_of(device, struct ib_uverbs_device, dev); uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); mutex_destroy(&dev->lists_mutex); mutex_destroy(&dev->xrcd_tree_mutex); kfree(dev); } void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { spin_lock_irq(&ev_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&ev_file->ev_queue.lock); uverbs_uobject_put(&ev_file->uobj); } ib_uverbs_release_uevent(&uobj->uevent); } void ib_uverbs_release_uevent(struct ib_uevent_object *uobj) { struct ib_uverbs_async_event_file *async_file = uobj->event_file; struct ib_uverbs_event *evt, *tmp; if (!async_file) return; spin_lock_irq(&async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&async_file->ev_queue.lock); uverbs_uobject_put(&async_file->uobj); } void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { ib_detach_mcast(qp, &mcast->gid, mcast->lid); list_del(&mcast->list); kfree(mcast); } } static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); } void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); struct ib_device *ib_dev; int srcu_key; release_ufile_idr_uobject(file); srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (ib_dev && !ib_dev->ops.disassociate_ucontext) module_put(ib_dev->ops.owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); if (refcount_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); if (file->default_async_file) uverbs_uobject_put(&file->default_async_file->uobj); put_device(&file->device->dev); if (file->disassociate_page) __free_pages(file->disassociate_page, 0); mutex_destroy(&file->disassociation_lock); mutex_destroy(&file->umap_lock); mutex_destroy(&file->ucontext_lock); kfree(file); } static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, struct file *filp, char __user *buf, size_t count, loff_t *pos, size_t eventsz) { struct ib_uverbs_event *event; int ret = 0; spin_lock_irq(&ev_queue->lock); while (list_empty(&ev_queue->event_list)) { if (ev_queue->is_closed) { spin_unlock_irq(&ev_queue->lock); return -EIO; } spin_unlock_irq(&ev_queue->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(ev_queue->poll_wait, (!list_empty(&ev_queue->event_list) || ev_queue->is_closed))) return -ERESTARTSYS; spin_lock_irq(&ev_queue->lock); } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { list_del(ev_queue->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } spin_unlock_irq(&ev_queue->lock); if (event) { if (copy_to_user(buf, event, eventsz)) ret = -EFAULT; else ret = eventsz; } kfree(event); return ret; } static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_async_event_file *file = filp->private_data; return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_async_event_desc)); } static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_comp_event_desc)); } static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, struct file *filp, struct poll_table_struct *wait) { __poll_t pollflags = 0; poll_wait(filp, &ev_queue->poll_wait, wait); spin_lock_irq(&ev_queue->lock); if (!list_empty(&ev_queue->event_list)) pollflags = EPOLLIN | EPOLLRDNORM; else if (ev_queue->is_closed) pollflags = EPOLLERR; spin_unlock_irq(&ev_queue->lock); return pollflags; } static __poll_t ib_uverbs_async_event_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_uverbs_async_event_file *file = filp->private_data; return ib_uverbs_event_poll(&file->ev_queue, filp, wait); } static __poll_t ib_uverbs_comp_event_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); } static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_async_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->ev_queue.async_queue); } static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); } const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_comp_event_read, .poll = ib_uverbs_comp_event_poll, .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_comp_event_fasync, }; const struct file_operations uverbs_async_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_async_event_read, .poll = ib_uverbs_async_event_poll, .release = uverbs_async_event_release, .fasync = ib_uverbs_async_event_fasync, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_queue *ev_queue = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; if (!ev_queue) return; spin_lock_irqsave(&ev_queue->lock, flags); if (ev_queue->is_closed) { spin_unlock_irqrestore(&ev_queue->lock, flags); return; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&ev_queue->lock, flags); return; } uobj = cq->uobject; entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &ev_queue->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&ev_queue->lock, flags); wake_up_interruptible(&ev_queue->poll_wait); kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, __u64 element, __u64 event, struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; if (!async_file) return; spin_lock_irqsave(&async_file->ev_queue.lock, flags); if (async_file->ev_queue.is_closed) { spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &async_file->ev_queue.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); wake_up_interruptible(&async_file->ev_queue.poll_wait); kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN); } static void uverbs_uobj_event(struct ib_uevent_object *eobj, struct ib_event *event) { ib_uverbs_async_handler(eobj->event_file, eobj->uobject.user_handle, event->event, &eobj->event_list, &eobj->events_reported); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { uverbs_uobj_event(&event->element.cq->uobject->uevent, event); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { /* for XRC target qp's, check that qp is live */ if (!event->element.qp->uobject) return; uverbs_uobj_event(&event->element.qp->uobject->uevent, event); } void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { uverbs_uobj_event(&event->element.wq->uobject->uevent, event); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { uverbs_uobj_event(&event->element.srq->uobject->uevent, event); } static void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { ib_uverbs_async_handler( container_of(handler, struct ib_uverbs_async_event_file, event_handler), event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) { spin_lock_init(&ev_queue->lock); INIT_LIST_HEAD(&ev_queue->event_list); init_waitqueue_head(&ev_queue->poll_wait); ev_queue->is_closed = 0; ev_queue->async_queue = NULL; } void ib_uverbs_init_async_event_file( struct ib_uverbs_async_event_file *async_file) { struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile; struct ib_device *ib_dev = async_file->uobj.context->device; ib_uverbs_init_event_queue(&async_file->ev_queue); /* The first async_event_file becomes the default one for the file. */ mutex_lock(&uverbs_file->ucontext_lock); if (!uverbs_file->default_async_file) { /* Pairs with the put in ib_uverbs_release_file */ uverbs_uobject_get(&async_file->uobj); smp_store_release(&uverbs_file->default_async_file, async_file); } mutex_unlock(&uverbs_file->ucontext_lock); INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev, ib_uverbs_event_handler); ib_register_event_handler(&async_file->event_handler); } static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, const struct uverbs_api_write_method *method_elm) { if (method_elm->is_ex) { count -= sizeof(*hdr) + sizeof(*ex_hdr); if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) return -EINVAL; if (hdr->in_words * 8 < method_elm->req_size) return -ENOSPC; if (ex_hdr->cmd_hdr_reserved) return -EINVAL; if (ex_hdr->response) { if (!hdr->out_words && !ex_hdr->provider_out_words) return -EINVAL; if (hdr->out_words * 8 < method_elm->resp_size) return -ENOSPC; if (!access_ok(u64_to_user_ptr(ex_hdr->response), (hdr->out_words + ex_hdr->provider_out_words) * 8)) return -EFAULT; } else { if (hdr->out_words || ex_hdr->provider_out_words) return -EINVAL; } return 0; } /* not extended command */ if (hdr->in_words * 4 != count) return -EINVAL; if (count < method_elm->req_size + sizeof(*hdr)) { /* * rdma-core v18 and v19 have a bug where they send DESTROY_CQ * with a 16 byte write instead of 24. Old kernels didn't * check the size so they allowed this. Now that the size is * checked provide a compatibility work around to not break * those userspaces. */ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && count == 16) { hdr->in_words = 6; return 0; } return -ENOSPC; } if (hdr->out_words * 4 < method_elm->resp_size) return -ENOSPC; return 0; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; const struct uverbs_api_write_method *method_elm; struct uverbs_api *uapi = file->device->uapi; struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_uverbs_cmd_hdr hdr; struct uverbs_attr_bundle bundle; int srcu_key; ssize_t ret; if (!ib_safe_file_access(filp)) { pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", task_tgid_vnr(current), current->comm); return -EACCES; } if (count < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; method_elm = uapi_get_method(uapi, hdr.command); if (IS_ERR(method_elm)) return PTR_ERR(method_elm); if (method_elm->is_ex) { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) return -EFAULT; } ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); if (ret) return ret; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); buf += sizeof(hdr); memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); bundle.ufile = file; bundle.context = NULL; /* only valid if bundle has uobject */ bundle.uobject = NULL; if (!method_elm->is_ex) { size_t in_len = hdr.in_words * 4 - sizeof(hdr); size_t out_len = hdr.out_words * 4; u64 response = 0; if (method_elm->has_udata) { bundle.driver_udata.inlen = in_len - method_elm->req_size; in_len = method_elm->req_size; if (bundle.driver_udata.inlen) bundle.driver_udata.inbuf = buf + in_len; else bundle.driver_udata.inbuf = NULL; } else { memset(&bundle.driver_udata, 0, sizeof(bundle.driver_udata)); } if (method_elm->has_resp) { /* * The macros check that if has_resp is set * then the command request structure starts * with a '__aligned u64 response' member. */ ret = get_user(response, (const u64 __user *)buf); if (ret) goto out_unlock; if (method_elm->has_udata) { bundle.driver_udata.outlen = out_len - method_elm->resp_size; out_len = method_elm->resp_size; if (bundle.driver_udata.outlen) bundle.driver_udata.outbuf = u64_to_user_ptr(response + out_len); else bundle.driver_udata.outbuf = NULL; } } else { bundle.driver_udata.outlen = 0; bundle.driver_udata.outbuf = NULL; } ib_uverbs_init_udata_buf_or_null( &bundle.ucore, buf, u64_to_user_ptr(response), in_len, out_len); } else { buf += sizeof(ex_hdr); ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, u64_to_user_ptr(ex_hdr.response), hdr.in_words * 8, hdr.out_words * 8); ib_uverbs_init_udata_buf_or_null( &bundle.driver_udata, buf + bundle.ucore.inlen, u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); } ret = method_elm->handler(&bundle); if (bundle.uobject) uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true, !ret, &bundle); out_unlock: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return (ret) ? : count; } static const struct vm_operations_struct rdma_umap_ops; static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; struct ib_ucontext *ucontext; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ucontext = ib_uverbs_get_ucontext_file(file); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto out; } mutex_lock(&file->disassociation_lock); vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); mutex_unlock(&file->disassociation_lock); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } /* * The VMA has been dup'd, initialize the vm_private_data with a new tracking * struct */ static void rdma_umap_open(struct vm_area_struct *vma) { struct ib_uverbs_file *ufile = vma->vm_file->private_data; struct rdma_umap_priv *opriv = vma->vm_private_data; struct rdma_umap_priv *priv; if (!opriv) return; /* We are racing with disassociation */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) goto out_zap; mutex_lock(&ufile->disassociation_lock); /* * Disassociation already completed, the VMA should already be zapped. */ if (!ufile->ucontext) goto out_unlock; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_unlock; rdma_umap_priv_init(priv, vma, opriv->entry); mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); return; out_unlock: mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); out_zap: /* * We can't allow the VMA to be created with the actual IO pages, that * would break our API contract, and it can't be stopped at this * point, so zap it. */ vma->vm_private_data = NULL; zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); } static void rdma_umap_close(struct vm_area_struct *vma) { struct ib_uverbs_file *ufile = vma->vm_file->private_data; struct rdma_umap_priv *priv = vma->vm_private_data; if (!priv) return; /* * The vma holds a reference on the struct file that created it, which * in turn means that the ib_uverbs_file is guaranteed to exist at * this point. */ mutex_lock(&ufile->umap_lock); if (priv->entry) rdma_user_mmap_entry_put(priv->entry); list_del(&priv->list); mutex_unlock(&ufile->umap_lock); kfree(priv); } /* * Once the zap_vma_ptes has been called touches to the VMA will come here and * we return a dummy writable zero page for all the pfns. */ static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) { struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; struct rdma_umap_priv *priv = vmf->vma->vm_private_data; vm_fault_t ret = 0; if (!priv) return VM_FAULT_SIGBUS; /* Read only pages can just use the system zero page. */ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { vmf->page = ZERO_PAGE(vmf->address); get_page(vmf->page); return 0; } mutex_lock(&ufile->umap_lock); if (!ufile->disassociate_page) ufile->disassociate_page = alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); if (ufile->disassociate_page) { /* * This VMA is forced to always be shared so this doesn't have * to worry about COW. */ vmf->page = ufile->disassociate_page; get_page(vmf->page); } else { ret = VM_FAULT_SIGBUS; } mutex_unlock(&ufile->umap_lock); return ret; } static const struct vm_operations_struct rdma_umap_ops = { .open = rdma_umap_open, .close = rdma_umap_close, .fault = rdma_umap_fault, }; void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; mutex_lock(&ufile->disassociation_lock); while (1) { struct mm_struct *mm = NULL; /* Get an arbitrary mm pointer that hasn't been cleaned yet */ mutex_lock(&ufile->umap_lock); while (!list_empty(&ufile->umaps)) { int ret; priv = list_first_entry(&ufile->umaps, struct rdma_umap_priv, list); mm = priv->vma->vm_mm; ret = mmget_not_zero(mm); if (!ret) { list_del_init(&priv->list); if (priv->entry) { rdma_user_mmap_entry_put(priv->entry); priv->entry = NULL; } mm = NULL; continue; } break; } mutex_unlock(&ufile->umap_lock); if (!mm) { mutex_unlock(&ufile->disassociation_lock); return; } /* * The umap_lock is nested under mmap_lock since it used within * the vma_ops callbacks, so we have to clean the list one mm * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal. */ mmap_read_lock(mm); mutex_lock(&ufile->umap_lock); list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list) { struct vm_area_struct *vma = priv->vma; if (vma->vm_mm != mm) continue; list_del_init(&priv->list); zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); if (priv->entry) { rdma_user_mmap_entry_put(priv->entry); priv->entry = NULL; } } mutex_unlock(&ufile->umap_lock); mmap_read_unlock(mm); mmput(mm); } mutex_unlock(&ufile->disassociation_lock); } /** * rdma_user_mmap_disassociate() - Revoke mmaps for a device * @device: device to revoke * * This function should be called by drivers that need to disable mmaps for the * device, for instance because it is going to be reset. */ void rdma_user_mmap_disassociate(struct ib_device *device) { struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); struct ib_uverbs_file *ufile; mutex_lock(&uverbs_dev->lists_mutex); list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) { if (ufile->ucontext) uverbs_user_mmap_disassociate(ufile); } mutex_unlock(&uverbs_dev->lists_mutex); } EXPORT_SYMBOL(rdma_user_mmap_disassociate); /* * ib_uverbs_open() does not need the BKL: * * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the open method will either immediately run -ENXIO, or all * required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; struct ib_device *ib_dev; int ret; int module_dependent; int srcu_key; dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); if (!refcount_inc_not_zero(&dev->refcount)) return -ENXIO; get_device(&dev->dev); srcu_key = srcu_read_lock(&dev->disassociate_srcu); mutex_lock(&dev->lists_mutex); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto err; } if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) { ret = -EPERM; goto err; } /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ module_dependent = !(ib_dev->ops.disassociate_ucontext); if (module_dependent) { if (!try_module_get(ib_dev->ops.owner)) { ret = -ENODEV; goto err; } } file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) { ret = -ENOMEM; if (module_dependent) goto err_module; goto err; } file->device = dev; kref_init(&file->ref); mutex_init(&file->ucontext_lock); spin_lock_init(&file->uobjects_lock); INIT_LIST_HEAD(&file->uobjects); init_rwsem(&file->hw_destroy_rwsem); mutex_init(&file->umap_lock); INIT_LIST_HEAD(&file->umaps); mutex_init(&file->disassociation_lock); filp->private_data = file; list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); setup_ufile_idr_uobject(file); return stream_open(inode, filp); err_module: module_put(ib_dev->ops.owner); err: mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); if (refcount_dec_and_test(&dev->refcount)) ib_uverbs_comp_dev(dev); put_device(&dev->dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); mutex_lock(&file->device->lists_mutex); list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); kref_put(&file->ref, ib_uverbs_release_file); return 0; } static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static const struct file_operations uverbs_mmap_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data, struct ib_client_nl_info *res) { struct ib_uverbs_device *uverbs_dev = client_data; int ret; if (res->port != -1) return -EINVAL; res->abi = ibdev->ops.uverbs_abi_ver; res->cdev = &uverbs_dev->dev; /* * To support DRIVER_ID binding in userspace some of the driver need * upgrading to expose their PCI dependent revision information * through get_context instead of relying on modalias matching. When * the drivers are fixed they can drop this flag. */ if (!ibdev->ops.uverbs_no_driver_id_binding) { ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, ibdev->ops.driver_id); if (ret) return ret; } return 0; } static struct ib_client uverbs_client = { .name = "uverbs", .no_kverbs_req = true, .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one, .get_nl_info = ib_uverbs_get_nl_info, }; MODULE_ALIAS_RDMA_CLIENT("uverbs"); static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev)); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR_RO(ibdev); static ssize_t abi_version_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR_RO(abi_version); static struct attribute *ib_dev_attrs[] = { &dev_attr_abi_version.attr, &dev_attr_ibdev.attr, NULL, }; static const struct attribute_group dev_attr_group = { .attrs = ib_dev_attrs, }; static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); static int ib_uverbs_create_uapi(struct ib_device *device, struct ib_uverbs_device *uverbs_dev) { struct uverbs_api *uapi; uapi = uverbs_alloc_api(device); if (IS_ERR(uapi)) return PTR_ERR(uapi); uverbs_dev->uapi = uapi; return 0; } static int ib_uverbs_add_one(struct ib_device *device) { int devnum; dev_t base; struct ib_uverbs_device *uverbs_dev; int ret; if (!device->ops.alloc_ucontext || device->type == RDMA_DEVICE_TYPE_SMI) return -EOPNOTSUPP; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); if (!uverbs_dev) return -ENOMEM; ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); if (ret) { kfree(uverbs_dev); return -ENOMEM; } device_initialize(&uverbs_dev->dev); uverbs_dev->dev.class = &uverbs_class; uverbs_dev->dev.parent = device->dev.parent; uverbs_dev->dev.release = ib_uverbs_release_dev; uverbs_dev->groups[0] = &dev_attr_group; uverbs_dev->dev.groups = uverbs_dev->groups; refcount_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, GFP_KERNEL); if (devnum < 0) { ret = -ENOMEM; goto err; } uverbs_dev->devnum = devnum; if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; else base = IB_UVERBS_BASE_DEV + devnum; ret = ib_uverbs_create_uapi(device, uverbs_dev); if (ret) goto err_uapi; uverbs_dev->dev.devt = base; dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); cdev_init(&uverbs_dev->cdev, device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops); uverbs_dev->cdev.owner = THIS_MODULE; ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); if (ret) goto err_uapi; ib_set_client_data(device, &uverbs_client, uverbs_dev); return 0; err_uapi: ida_free(&uverbs_ida, devnum); err: if (refcount_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); put_device(&uverbs_dev->dev); return ret; } static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; /* Pending running commands to terminate */ uverbs_disassociate_api_pre(uverbs_dev); mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); list_del_init(&file->list); kref_get(&file->ref); /* We must release the mutex before going ahead and calling * uverbs_cleanup_ufile, as it might end up indirectly calling * uverbs_close, for example due to freeing the resources (e.g * mmput). */ mutex_unlock(&uverbs_dev->lists_mutex); uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); mutex_lock(&uverbs_dev->lists_mutex); } mutex_unlock(&uverbs_dev->lists_mutex); uverbs_disassociate_api(uverbs_dev->uapi); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) { struct ib_uverbs_device *uverbs_dev = client_data; int wait_clients = 1; cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); ida_free(&uverbs_ida, uverbs_dev->devnum); if (device->ops.disassociate_ucontext) { /* We disassociate HW resources and immediately return. * Userspace will see a EIO errno for all future access. * Upon returning, ib_device may be freed internally and is not * valid any more. * uverbs_device is still available until all clients close * their files, then the uverbs device ref count will be zero * and its resources will be freed. * Note: At this point no more files can be opened since the * cdev was deleted, however active clients can still issue * commands and close their open files. */ ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } if (refcount_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); put_device(&uverbs_dev->dev); } static int __init ib_uverbs_init(void) { int ret; ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR, "infiniband_verbs"); if (ret) { pr_err("user_verbs: couldn't register device number\n"); goto out; } ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0, IB_UVERBS_NUM_DYNAMIC_MINOR, "infiniband_verbs"); if (ret) { pr_err("couldn't register dynamic device number\n"); goto out_alloc; } ret = class_register(&uverbs_class); if (ret) { pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr); if (ret) { pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&uverbs_client); if (ret) { pr_err("user_verbs: couldn't register client\n"); goto out_class; } return 0; out_class: class_unregister(&uverbs_class); out_chrdev: unregister_chrdev_region(dynamic_uverbs_dev, IB_UVERBS_NUM_DYNAMIC_MINOR); out_alloc: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR); out: return ret; } static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); class_unregister(&uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_uverbs_dev, IB_UVERBS_NUM_DYNAMIC_MINOR); mmu_notifier_synchronize(); } module_init(ib_uverbs_init); module_exit(ib_uverbs_cleanup);
93 93 93 90 91 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer System services Client * Copyright (c) 1998-1999 by Frank van de Pol <fvdpol@coil.demon.nl> */ #include <linux/init.h> #include <linux/export.h> #include <linux/slab.h> #include <sound/core.h> #include "seq_system.h" #include "seq_timer.h" #include "seq_queue.h" /* internal client that provide system services, access to timer etc. */ /* * Port "Timer" * - send tempo /start/stop etc. events to this port to manipulate the * queue's timer. The queue address is specified in * data.queue.queue. * - this port supports subscription. The received timer events are * broadcasted to all subscribed clients. The modified tempo * value is stored on data.queue.value. * The modifier client/port is not send. * * Port "Announce" * - does not receive message * - supports supscription. For each client or port attaching to or * detaching from the system an announcement is send to the subscribed * clients. * * Idea: the subscription mechanism might also work handy for distributing * synchronisation and timing information. In this case we would ideally have * a list of subscribers for each type of sync (time, tick), for each timing * queue. * * NOTE: the queue to be started, stopped, etc. must be specified * in data.queue.addr.queue field. queue is used only for * scheduling, and no longer referred as affected queue. * They are used only for timer broadcast (see above). * -- iwai */ /* client id of our system client */ static int sysclient = -1; /* port id numbers for this client */ static int announce_port = -1; /* number of subscriptions to announce port */ static int announce_subscribed; /* fill standard header data, source port & channel are filled in */ static int setheader(struct snd_seq_event * ev, int client, int port) { if (announce_port < 0 || !announce_subscribed) return -ENODEV; memset(ev, 0, sizeof(struct snd_seq_event)); ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK; ev->flags |= SNDRV_SEQ_EVENT_LENGTH_FIXED; ev->source.client = sysclient; ev->source.port = announce_port; ev->dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; /* fill data */ /*ev->data.addr.queue = SNDRV_SEQ_ADDRESS_UNKNOWN;*/ ev->data.addr.client = client; ev->data.addr.port = port; return 0; } /* entry points for broadcasting system events */ void snd_seq_system_broadcast(int client, int port, int type, bool atomic) { struct snd_seq_event ev; if (setheader(&ev, client, port) < 0) return; ev.type = type; snd_seq_kernel_client_dispatch(sysclient, &ev, atomic, 0); } EXPORT_SYMBOL_GPL(snd_seq_system_broadcast); /* entry points for broadcasting system events */ int snd_seq_system_notify(int client, int port, struct snd_seq_event *ev, bool atomic) { ev->flags = SNDRV_SEQ_EVENT_LENGTH_FIXED; ev->source.client = sysclient; ev->source.port = announce_port; ev->dest.client = client; ev->dest.port = port; return snd_seq_kernel_client_dispatch(sysclient, ev, atomic, 0); } /* call-back handler for timer events */ static int event_input_timer(struct snd_seq_event * ev, int direct, void *private_data, int atomic, int hop) { return snd_seq_control_queue(ev, atomic, hop); } static int sys_announce_subscribe(void *private_data, struct snd_seq_port_subscribe *info) { announce_subscribed++; return 0; } static int sys_announce_unsubscribe(void *private_data, struct snd_seq_port_subscribe *info) { if (snd_BUG_ON(!announce_subscribed)) return 0; announce_subscribed--; return 0; } /* register our internal client */ int __init snd_seq_system_client_init(void) { struct snd_seq_port_callback pcallbacks; struct snd_seq_port_info *port; int err; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) return -ENOMEM; memset(&pcallbacks, 0, sizeof(pcallbacks)); pcallbacks.owner = THIS_MODULE; pcallbacks.event_input = event_input_timer; /* register client */ sysclient = snd_seq_create_kernel_client(NULL, 0, "System"); if (sysclient < 0) { kfree(port); return sysclient; } /* register timer */ strcpy(port->name, "Timer"); port->capability = SNDRV_SEQ_PORT_CAP_WRITE; /* accept queue control */ port->capability |= SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_SUBS_READ; /* for broadcast */ port->kernel = &pcallbacks; port->type = 0; port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT; port->addr.client = sysclient; port->addr.port = SNDRV_SEQ_PORT_SYSTEM_TIMER; err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, port); if (err < 0) goto error_port; /* register announcement port */ strcpy(port->name, "Announce"); port->capability = SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_SUBS_READ; /* for broadcast only */ pcallbacks.event_input = NULL; pcallbacks.subscribe = sys_announce_subscribe; pcallbacks.unsubscribe = sys_announce_unsubscribe; port->kernel = &pcallbacks; port->type = 0; port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT; port->addr.client = sysclient; port->addr.port = SNDRV_SEQ_PORT_SYSTEM_ANNOUNCE; err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, port); if (err < 0) goto error_port; announce_port = port->addr.port; kfree(port); return 0; error_port: snd_seq_system_client_done(); kfree(port); return err; } /* unregister our internal client */ void snd_seq_system_client_done(void) { int oldsysclient = sysclient; if (oldsysclient >= 0) { sysclient = -1; announce_port = -1; snd_seq_delete_kernel_client(oldsysclient); } }
2 10 8 2 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 // SPDX-License-Identifier: GPL-2.0-or-later /* Module signature checker * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/module_signature.h> #include <linux/string.h> #include <linux/verification.h> #include <linux/security.h> #include <crypto/public_key.h> #include <uapi/linux/module.h> #include "internal.h" #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "module." static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); /* * Export sig_enforce kernel cmdline parameter to allow other subsystems rely * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. */ bool is_module_sig_enforced(void) { return sig_enforce; } EXPORT_SYMBOL(is_module_sig_enforced); void set_module_sig_enforced(void) { sig_enforce = true; } /* * Verify the signature on a module. */ int mod_verify_sig(const void *mod, struct load_info *info) { struct module_signature ms; size_t sig_len, modlen = info->len; int ret; pr_devel("==>%s(,%zu)\n", __func__, modlen); if (modlen <= sizeof(ms)) return -EBADMSG; memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); ret = mod_check_sig(&ms, modlen, "module"); if (ret) return ret; sig_len = be32_to_cpu(ms.sig_len); modlen -= sig_len + sizeof(ms); info->len = modlen; return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, VERIFY_USE_SECONDARY_KEYRING, VERIFYING_MODULE_SIGNATURE, NULL, NULL); } int module_sig_check(struct load_info *info, int flags) { int err = -ENODATA; const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; const char *reason; const void *mod = info->hdr; bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS | MODULE_INIT_IGNORE_VERMAGIC); /* * Do not allow mangled modules as a module with version information * removed is no longer the module that was signed. */ if (!mangled_module && info->len > markerlen && memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ info->len -= markerlen; err = mod_verify_sig(mod, info); if (!err) { info->sig_ok = true; return 0; } } /* * We don't permit modules to be loaded into the trusted kernels * without a valid signature on them, but if we're not enforcing, * certain errors are non-fatal. */ switch (err) { case -ENODATA: reason = "unsigned module"; break; case -ENOPKG: reason = "module with unsupported crypto"; break; case -ENOKEY: reason = "module with unavailable key"; break; default: /* * All other errors are fatal, including lack of memory, * unparseable signatures, and signature check failures -- * even if signatures aren't required. */ return err; } if (is_module_sig_enforced()) { pr_notice("Loading of %s is rejected\n", reason); return -EKEYREJECTED; } return security_locked_down(LOCKDOWN_MODULE_SIGNATURE); }
231 232 2 71 81 52 25 189 189 10 1 138 1 26 1 13 6 6 6 6 7 19 6 247 1 2 148 28 103 4 103 104 4 30 22 22 8 8 6 4 22 2 247 36 247 248 246 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 // SPDX-License-Identifier: GPL-2.0-only /* * common LSM auditing functions * * Based on code written for SELinux by : * Stephen Smalley, <sds@tycho.nsa.gov> * James Morris <jmorris@redhat.com> * Author : Etienne Basset, <etienne.basset@ensta.org> */ #include <linux/types.h> #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/fs.h> #include <linux/init.h> #include <net/sock.h> #include <linux/un.h> #include <net/af_unix.h> #include <linux/audit.h> #include <linux/ipv6.h> #include <linux/ip.h> #include <net/ip.h> #include <net/ipv6.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/dccp.h> #include <linux/sctp.h> #include <linux/lsm_audit.h> #include <linux/security.h> /** * ipv4_skb_to_auditdata : fill auditdata from skb * @skb : the skb * @ad : the audit data to fill * @proto : the layer 4 protocol * * return 0 on success */ int ipv4_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto) { int ret = 0; struct iphdr *ih; ih = ip_hdr(skb); ad->u.net->v4info.saddr = ih->saddr; ad->u.net->v4info.daddr = ih->daddr; if (proto) *proto = ih->protocol; /* non initial fragment */ if (ntohs(ih->frag_off) & IP_OFFSET) return 0; switch (ih->protocol) { case IPPROTO_TCP: { struct tcphdr *th = tcp_hdr(skb); ad->u.net->sport = th->source; ad->u.net->dport = th->dest; break; } case IPPROTO_UDP: { struct udphdr *uh = udp_hdr(skb); ad->u.net->sport = uh->source; ad->u.net->dport = uh->dest; break; } case IPPROTO_DCCP: { struct dccp_hdr *dh = dccp_hdr(skb); ad->u.net->sport = dh->dccph_sport; ad->u.net->dport = dh->dccph_dport; break; } case IPPROTO_SCTP: { struct sctphdr *sh = sctp_hdr(skb); ad->u.net->sport = sh->source; ad->u.net->dport = sh->dest; break; } default: ret = -EINVAL; } return ret; } #if IS_ENABLED(CONFIG_IPV6) /** * ipv6_skb_to_auditdata : fill auditdata from skb * @skb : the skb * @ad : the audit data to fill * @proto : the layer 4 protocol * * return 0 on success */ int ipv6_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto) { int offset, ret = 0; struct ipv6hdr *ip6; u8 nexthdr; __be16 frag_off; ip6 = ipv6_hdr(skb); ad->u.net->v6info.saddr = ip6->saddr; ad->u.net->v6info.daddr = ip6->daddr; /* IPv6 can have several extension header before the Transport header * skip them */ offset = skb_network_offset(skb); offset += sizeof(*ip6); nexthdr = ip6->nexthdr; offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); if (offset < 0) return 0; if (proto) *proto = nexthdr; switch (nexthdr) { case IPPROTO_TCP: { struct tcphdr _tcph, *th; th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); if (th == NULL) break; ad->u.net->sport = th->source; ad->u.net->dport = th->dest; break; } case IPPROTO_UDP: { struct udphdr _udph, *uh; uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); if (uh == NULL) break; ad->u.net->sport = uh->source; ad->u.net->dport = uh->dest; break; } case IPPROTO_DCCP: { struct dccp_hdr _dccph, *dh; dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); if (dh == NULL) break; ad->u.net->sport = dh->dccph_sport; ad->u.net->dport = dh->dccph_dport; break; } case IPPROTO_SCTP: { struct sctphdr _sctph, *sh; sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph); if (sh == NULL) break; ad->u.net->sport = sh->source; ad->u.net->dport = sh->dest; break; } default: ret = -EINVAL; } return ret; } #endif static inline void print_ipv6_addr(struct audit_buffer *ab, const struct in6_addr *addr, __be16 port, const char *name1, const char *name2) { if (!ipv6_addr_any(addr)) audit_log_format(ab, " %s=%pI6c", name1, addr); if (port) audit_log_format(ab, " %s=%d", name2, ntohs(port)); } static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr, __be16 port, const char *name1, const char *name2) { if (addr) audit_log_format(ab, " %s=%pI4", name1, &addr); if (port) audit_log_format(ab, " %s=%d", name2, ntohs(port)); } /** * dump_common_audit_data - helper to dump common audit data * @ab : the audit buffer * @a : common audit data * */ static void dump_common_audit_data(struct audit_buffer *ab, struct common_audit_data *a) { char comm[sizeof(current->comm)]; /* * To keep stack sizes in check force programmers to notice if they * start making this union too large! See struct lsm_network_audit * as an example of how to deal with large data. */ BUILD_BUG_ON(sizeof(a->u) > sizeof(void *)*2); audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current)); audit_log_untrustedstring(ab, get_task_comm(comm, current)); switch (a->type) { case LSM_AUDIT_DATA_NONE: return; case LSM_AUDIT_DATA_IPC: audit_log_format(ab, " ipc_key=%d ", a->u.ipc_id); break; case LSM_AUDIT_DATA_CAP: audit_log_format(ab, " capability=%d ", a->u.cap); break; case LSM_AUDIT_DATA_PATH: { struct inode *inode; audit_log_d_path(ab, " path=", &a->u.path); inode = d_backing_inode(a->u.path.dentry); if (inode) { audit_log_format(ab, " dev="); audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } break; } case LSM_AUDIT_DATA_FILE: { struct inode *inode; audit_log_d_path(ab, " path=", &a->u.file->f_path); inode = file_inode(a->u.file); if (inode) { audit_log_format(ab, " dev="); audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } break; } case LSM_AUDIT_DATA_IOCTL_OP: { struct inode *inode; audit_log_d_path(ab, " path=", &a->u.op->path); inode = a->u.op->path.dentry->d_inode; if (inode) { audit_log_format(ab, " dev="); audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } audit_log_format(ab, " ioctlcmd=0x%hx", a->u.op->cmd); break; } case LSM_AUDIT_DATA_DENTRY: { struct inode *inode; audit_log_format(ab, " name="); spin_lock(&a->u.dentry->d_lock); audit_log_untrustedstring(ab, a->u.dentry->d_name.name); spin_unlock(&a->u.dentry->d_lock); inode = d_backing_inode(a->u.dentry); if (inode) { audit_log_format(ab, " dev="); audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } break; } case LSM_AUDIT_DATA_INODE: { struct dentry *dentry; struct inode *inode; rcu_read_lock(); inode = a->u.inode; dentry = d_find_alias_rcu(inode); if (dentry) { audit_log_format(ab, " name="); spin_lock(&dentry->d_lock); audit_log_untrustedstring(ab, dentry->d_name.name); spin_unlock(&dentry->d_lock); } audit_log_format(ab, " dev="); audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); rcu_read_unlock(); break; } case LSM_AUDIT_DATA_TASK: { struct task_struct *tsk = a->u.tsk; if (tsk) { pid_t pid = task_tgid_nr(tsk); if (pid) { char tskcomm[sizeof(tsk->comm)]; audit_log_format(ab, " opid=%d ocomm=", pid); audit_log_untrustedstring(ab, get_task_comm(tskcomm, tsk)); } } break; } case LSM_AUDIT_DATA_NET: if (a->u.net->sk) { const struct sock *sk = a->u.net->sk; const struct unix_sock *u; struct unix_address *addr; int len = 0; char *p = NULL; switch (sk->sk_family) { case AF_INET: { const struct inet_sock *inet = inet_sk(sk); print_ipv4_addr(ab, inet->inet_rcv_saddr, inet->inet_sport, "laddr", "lport"); print_ipv4_addr(ab, inet->inet_daddr, inet->inet_dport, "faddr", "fport"); break; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { const struct inet_sock *inet = inet_sk(sk); print_ipv6_addr(ab, &sk->sk_v6_rcv_saddr, inet->inet_sport, "laddr", "lport"); print_ipv6_addr(ab, &sk->sk_v6_daddr, inet->inet_dport, "faddr", "fport"); break; } #endif case AF_UNIX: u = unix_sk(sk); addr = smp_load_acquire(&u->addr); if (!addr) break; if (u->path.dentry) { audit_log_d_path(ab, " path=", &u->path); break; } len = addr->len-sizeof(short); p = &addr->name->sun_path[0]; audit_log_format(ab, " path="); if (*p) audit_log_untrustedstring(ab, p); else audit_log_n_hex(ab, p, len); break; } } switch (a->u.net->family) { case AF_INET: print_ipv4_addr(ab, a->u.net->v4info.saddr, a->u.net->sport, "saddr", "src"); print_ipv4_addr(ab, a->u.net->v4info.daddr, a->u.net->dport, "daddr", "dest"); break; case AF_INET6: print_ipv6_addr(ab, &a->u.net->v6info.saddr, a->u.net->sport, "saddr", "src"); print_ipv6_addr(ab, &a->u.net->v6info.daddr, a->u.net->dport, "daddr", "dest"); break; } if (a->u.net->netif > 0) { struct net_device *dev; /* NOTE: we always use init's namespace */ dev = dev_get_by_index(&init_net, a->u.net->netif); if (dev) { audit_log_format(ab, " netif=%s", dev->name); dev_put(dev); } } break; #ifdef CONFIG_KEYS case LSM_AUDIT_DATA_KEY: audit_log_format(ab, " key_serial=%u", a->u.key_struct.key); if (a->u.key_struct.key_desc) { audit_log_format(ab, " key_desc="); audit_log_untrustedstring(ab, a->u.key_struct.key_desc); } break; #endif case LSM_AUDIT_DATA_KMOD: audit_log_format(ab, " kmod="); audit_log_untrustedstring(ab, a->u.kmod_name); break; case LSM_AUDIT_DATA_IBPKEY: { struct in6_addr sbn_pfx; memset(&sbn_pfx.s6_addr, 0, sizeof(sbn_pfx.s6_addr)); memcpy(&sbn_pfx.s6_addr, &a->u.ibpkey->subnet_prefix, sizeof(a->u.ibpkey->subnet_prefix)); audit_log_format(ab, " pkey=0x%x subnet_prefix=%pI6c", a->u.ibpkey->pkey, &sbn_pfx); break; } case LSM_AUDIT_DATA_IBENDPORT: audit_log_format(ab, " device=%s port_num=%u", a->u.ibendport->dev_name, a->u.ibendport->port); break; case LSM_AUDIT_DATA_LOCKDOWN: audit_log_format(ab, " lockdown_reason=\"%s\"", lockdown_reasons[a->u.reason]); break; case LSM_AUDIT_DATA_ANONINODE: audit_log_format(ab, " anonclass=%s", a->u.anonclass); break; case LSM_AUDIT_DATA_NLMSGTYPE: audit_log_format(ab, " nl-msgtype=%hu", a->u.nlmsg_type); break; } /* switch (a->type) */ } /** * common_lsm_audit - generic LSM auditing function * @a: auxiliary audit data * @pre_audit: lsm-specific pre-audit callback * @post_audit: lsm-specific post-audit callback * * setup the audit buffer for common security information * uses callback to print LSM specific information */ void common_lsm_audit(struct common_audit_data *a, void (*pre_audit)(struct audit_buffer *, void *), void (*post_audit)(struct audit_buffer *, void *)) { struct audit_buffer *ab; if (a == NULL) return; /* we use GFP_ATOMIC so we won't sleep */ ab = audit_log_start(audit_context(), GFP_ATOMIC | __GFP_NOWARN, AUDIT_AVC); if (ab == NULL) return; if (pre_audit) pre_audit(ab, a); dump_common_audit_data(ab, a); if (post_audit) post_audit(ab, a); audit_log_end(ab); }
8 8 8 8 6 6 1 24 23 4 24 48 48 5 5 4 4 9 2 7 6 6 6 6 9 9 5 5 7 1 6 2 1 5 6 6 1 1 6 9 1 8 6 6 3 6 6 3 3 2 2 1 1 2 6 6 6 6 6 6 6 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 /* * Copyright © 2012 Red Hat * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Dave Airlie <airlied@redhat.com> * Rob Clark <rob.clark@linaro.org> * */ #include <linux/export.h> #include <linux/dma-buf.h> #include <linux/rbtree.h> #include <linux/module.h> #include <drm/drm.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem.h> #include <drm/drm_prime.h> #include "drm_internal.h" MODULE_IMPORT_NS("DMA_BUF"); /** * DOC: overview and lifetime rules * * Similar to GEM global names, PRIME file descriptors are also used to share * buffer objects across processes. They offer additional security: as file * descriptors must be explicitly sent over UNIX domain sockets to be shared * between applications, they can't be guessed like the globally unique GEM * names. * * Drivers that support the PRIME API implement the drm_gem_object_funcs.export * and &drm_driver.gem_prime_import hooks. &dma_buf_ops implementations for * drivers are all individually exported for drivers which need to overwrite * or reimplement some of them. * * Reference Counting for GEM Drivers * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * On the export the &dma_buf holds a reference to the exported buffer object, * usually a &drm_gem_object. It takes this reference in the PRIME_HANDLE_TO_FD * IOCTL, when it first calls &drm_gem_object_funcs.export * and stores the exporting GEM object in the &dma_buf.priv field. This * reference needs to be released when the final reference to the &dma_buf * itself is dropped and its &dma_buf_ops.release function is called. For * GEM-based drivers, the &dma_buf should be exported using * drm_gem_dmabuf_export() and then released by drm_gem_dmabuf_release(). * * Thus the chain of references always flows in one direction, avoiding loops: * importing GEM object -> dma-buf -> exported GEM bo. A further complication * are the lookup caches for import and export. These are required to guarantee * that any given object will always have only one unique userspace handle. This * is required to allow userspace to detect duplicated imports, since some GEM * drivers do fail command submissions if a given buffer object is listed more * than once. These import and export caches in &drm_prime_file_private only * retain a weak reference, which is cleaned up when the corresponding object is * released. * * Self-importing: If userspace is using PRIME as a replacement for flink then * it will get a fd->handle request for a GEM object that it created. Drivers * should detect this situation and return back the underlying object from the * dma-buf private. For GEM based drivers this is handled in * drm_gem_prime_import() already. */ struct drm_prime_member { struct dma_buf *dma_buf; uint32_t handle; struct rb_node dmabuf_rb; struct rb_node handle_rb; }; static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t handle) { struct drm_prime_member *member; struct rb_node **p, *rb; member = kmalloc(sizeof(*member), GFP_KERNEL); if (!member) return -ENOMEM; get_dma_buf(dma_buf); member->dma_buf = dma_buf; member->handle = handle; rb = NULL; p = &prime_fpriv->dmabufs.rb_node; while (*p) { struct drm_prime_member *pos; rb = *p; pos = rb_entry(rb, struct drm_prime_member, dmabuf_rb); if (dma_buf > pos->dma_buf) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&member->dmabuf_rb, rb, p); rb_insert_color(&member->dmabuf_rb, &prime_fpriv->dmabufs); rb = NULL; p = &prime_fpriv->handles.rb_node; while (*p) { struct drm_prime_member *pos; rb = *p; pos = rb_entry(rb, struct drm_prime_member, handle_rb); if (handle > pos->handle) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&member->handle_rb, rb, p); rb_insert_color(&member->handle_rb, &prime_fpriv->handles); return 0; } static struct dma_buf *drm_prime_lookup_buf_by_handle(struct drm_prime_file_private *prime_fpriv, uint32_t handle) { struct rb_node *rb; rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; member = rb_entry(rb, struct drm_prime_member, handle_rb); if (member->handle == handle) return member->dma_buf; else if (member->handle < handle) rb = rb->rb_right; else rb = rb->rb_left; } return NULL; } static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t *handle) { struct rb_node *rb; rb = prime_fpriv->dmabufs.rb_node; while (rb) { struct drm_prime_member *member; member = rb_entry(rb, struct drm_prime_member, dmabuf_rb); if (member->dma_buf == dma_buf) { *handle = member->handle; return 0; } else if (member->dma_buf < dma_buf) { rb = rb->rb_right; } else { rb = rb->rb_left; } } return -ENOENT; } void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, uint32_t handle) { struct rb_node *rb; mutex_lock(&prime_fpriv->lock); rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; member = rb_entry(rb, struct drm_prime_member, handle_rb); if (member->handle == handle) { rb_erase(&member->handle_rb, &prime_fpriv->handles); rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs); dma_buf_put(member->dma_buf); kfree(member); break; } else if (member->handle < handle) { rb = rb->rb_right; } else { rb = rb->rb_left; } } mutex_unlock(&prime_fpriv->lock); } void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv) { mutex_init(&prime_fpriv->lock); prime_fpriv->dmabufs = RB_ROOT; prime_fpriv->handles = RB_ROOT; } void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv) { /* by now drm_gem_release should've made sure the list is empty */ WARN_ON(!RB_EMPTY_ROOT(&prime_fpriv->dmabufs)); } /** * drm_gem_dmabuf_export - &dma_buf export implementation for GEM * @dev: parent device for the exported dmabuf * @exp_info: the export information used by dma_buf_export() * * This wraps dma_buf_export() for use by generic GEM drivers that are using * drm_gem_dmabuf_release(). In addition to calling dma_buf_export(), we take * a reference to the &drm_device and the exported &drm_gem_object (stored in * &dma_buf_export_info.priv) which is released by drm_gem_dmabuf_release(). * * Returns the new dmabuf. */ struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev, struct dma_buf_export_info *exp_info) { struct drm_gem_object *obj = exp_info->priv; struct dma_buf *dma_buf; dma_buf = dma_buf_export(exp_info); if (IS_ERR(dma_buf)) return dma_buf; drm_dev_get(dev); drm_gem_object_get(obj); dma_buf->file->f_mapping = obj->dev->anon_inode->i_mapping; return dma_buf; } EXPORT_SYMBOL(drm_gem_dmabuf_export); /** * drm_gem_dmabuf_release - &dma_buf release implementation for GEM * @dma_buf: buffer to be released * * Generic release function for dma_bufs exported as PRIME buffers. GEM drivers * must use this in their &dma_buf_ops structure as the release callback. * drm_gem_dmabuf_release() should be used in conjunction with * drm_gem_dmabuf_export(). */ void drm_gem_dmabuf_release(struct dma_buf *dma_buf) { struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; /* drop the reference on the export fd holds */ drm_gem_object_put(obj); drm_dev_put(dev); } EXPORT_SYMBOL(drm_gem_dmabuf_release); /** * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers * @dev: drm_device to import into * @file_priv: drm file-private structure * @prime_fd: fd id of the dma-buf which should be imported * @handle: pointer to storage for the handle of the imported buffer object * * This is the PRIME import function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual importing of GEM object from the dma-buf is done through the * &drm_driver.gem_prime_import driver callback. * * Returns 0 on success or a negative error code on failure. */ int drm_gem_prime_fd_to_handle(struct drm_device *dev, struct drm_file *file_priv, int prime_fd, uint32_t *handle) { struct dma_buf *dma_buf; struct drm_gem_object *obj; int ret; dma_buf = dma_buf_get(prime_fd); if (IS_ERR(dma_buf)) return PTR_ERR(dma_buf); mutex_lock(&file_priv->prime.lock); ret = drm_prime_lookup_buf_handle(&file_priv->prime, dma_buf, handle); if (ret == 0) goto out_put; /* never seen this one, need to import */ mutex_lock(&dev->object_name_lock); if (dev->driver->gem_prime_import) obj = dev->driver->gem_prime_import(dev, dma_buf); else obj = drm_gem_prime_import(dev, dma_buf); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto out_unlock; } if (obj->dma_buf) { WARN_ON(obj->dma_buf != dma_buf); } else { obj->dma_buf = dma_buf; get_dma_buf(dma_buf); } /* _handle_create_tail unconditionally unlocks dev->object_name_lock. */ ret = drm_gem_handle_create_tail(file_priv, obj, handle); drm_gem_object_put(obj); if (ret) goto out_put; ret = drm_prime_add_buf_handle(&file_priv->prime, dma_buf, *handle); mutex_unlock(&file_priv->prime.lock); if (ret) goto fail; dma_buf_put(dma_buf); return 0; fail: /* hmm, if driver attached, we are relying on the free-object path * to detach.. which seems ok.. */ drm_gem_handle_delete(file_priv, *handle); dma_buf_put(dma_buf); return ret; out_unlock: mutex_unlock(&dev->object_name_lock); out_put: mutex_unlock(&file_priv->prime.lock); dma_buf_put(dma_buf); return ret; } EXPORT_SYMBOL(drm_gem_prime_fd_to_handle); int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_prime_handle *args = data; if (dev->driver->prime_fd_to_handle) { return dev->driver->prime_fd_to_handle(dev, file_priv, args->fd, &args->handle); } return drm_gem_prime_fd_to_handle(dev, file_priv, args->fd, &args->handle); } static struct dma_buf *export_and_register_object(struct drm_device *dev, struct drm_gem_object *obj, uint32_t flags) { struct dma_buf *dmabuf; /* prevent races with concurrent gem_close. */ if (obj->handle_count == 0) { dmabuf = ERR_PTR(-ENOENT); return dmabuf; } if (obj->funcs && obj->funcs->export) dmabuf = obj->funcs->export(obj, flags); else dmabuf = drm_gem_prime_export(obj, flags); if (IS_ERR(dmabuf)) { /* normally the created dma-buf takes ownership of the ref, * but if that fails then drop the ref */ return dmabuf; } /* * Note that callers do not need to clean up the export cache * since the check for obj->handle_count guarantees that someone * will clean it up. */ obj->dma_buf = dmabuf; get_dma_buf(obj->dma_buf); return dmabuf; } /** * drm_gem_prime_handle_to_dmabuf - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure * @handle: buffer handle to export * @flags: flags like DRM_CLOEXEC * * This is the PRIME export function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. * * Unlike drm_gem_prime_handle_to_fd(), it returns the struct dma_buf it * has created, without attaching it to any file descriptors. The difference * between those two is similar to that between anon_inode_getfile() and * anon_inode_getfd(); insertion into descriptor table is something you * can not revert if any cleanup is needed, so the descriptor-returning * variants should only be used when you are past the last failure exit * and the only thing left is passing the new file descriptor to userland. * When all you need is the object itself or when you need to do something * else that might fail, use that one instead. */ struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags) { struct drm_gem_object *obj; int ret = 0; struct dma_buf *dmabuf; mutex_lock(&file_priv->prime.lock); obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { dmabuf = ERR_PTR(-ENOENT); goto out_unlock; } dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle); if (dmabuf) { get_dma_buf(dmabuf); goto out; } mutex_lock(&dev->object_name_lock); /* re-export the original imported object */ if (obj->import_attach) { dmabuf = obj->import_attach->dmabuf; get_dma_buf(dmabuf); goto out_have_obj; } if (obj->dma_buf) { get_dma_buf(obj->dma_buf); dmabuf = obj->dma_buf; goto out_have_obj; } dmabuf = export_and_register_object(dev, obj, flags); if (IS_ERR(dmabuf)) { /* normally the created dma-buf takes ownership of the ref, * but if that fails then drop the ref */ mutex_unlock(&dev->object_name_lock); goto out; } out_have_obj: /* * If we've exported this buffer then cheat and add it to the import list * so we get the correct handle back. We must do this under the * protection of dev->object_name_lock to ensure that a racing gem close * ioctl doesn't miss to remove this buffer handle from the cache. */ ret = drm_prime_add_buf_handle(&file_priv->prime, dmabuf, handle); mutex_unlock(&dev->object_name_lock); if (ret) { dma_buf_put(dmabuf); dmabuf = ERR_PTR(ret); } out: drm_gem_object_put(obj); out_unlock: mutex_unlock(&file_priv->prime.lock); return dmabuf; } EXPORT_SYMBOL(drm_gem_prime_handle_to_dmabuf); /** * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure * @handle: buffer handle to export * @flags: flags like DRM_CLOEXEC * @prime_fd: pointer to storage for the fd id of the create dma-buf * * This is the PRIME export function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. */ int drm_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags, int *prime_fd) { struct dma_buf *dmabuf; int fd = get_unused_fd_flags(flags); if (fd < 0) return fd; dmabuf = drm_gem_prime_handle_to_dmabuf(dev, file_priv, handle, flags); if (IS_ERR(dmabuf)) { put_unused_fd(fd); return PTR_ERR(dmabuf); } fd_install(fd, dmabuf->file); *prime_fd = fd; return 0; } EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_prime_handle *args = data; /* check flags are valid */ if (args->flags & ~(DRM_CLOEXEC | DRM_RDWR)) return -EINVAL; if (dev->driver->prime_handle_to_fd) { return dev->driver->prime_handle_to_fd(dev, file_priv, args->handle, args->flags, &args->fd); } return drm_gem_prime_handle_to_fd(dev, file_priv, args->handle, args->flags, &args->fd); } /** * DOC: PRIME Helpers * * Drivers can implement &drm_gem_object_funcs.export and * &drm_driver.gem_prime_import in terms of simpler APIs by using the helper * functions drm_gem_prime_export() and drm_gem_prime_import(). These functions * implement dma-buf support in terms of some lower-level helpers, which are * again exported for drivers to use individually: * * Exporting buffers * ~~~~~~~~~~~~~~~~~ * * Optional pinning of buffers is handled at dma-buf attach and detach time in * drm_gem_map_attach() and drm_gem_map_detach(). Backing storage itself is * handled by drm_gem_map_dma_buf() and drm_gem_unmap_dma_buf(), which relies on * &drm_gem_object_funcs.get_sg_table. If &drm_gem_object_funcs.get_sg_table is * unimplemented, exports into another device are rejected. * * For kernel-internal access there's drm_gem_dmabuf_vmap() and * drm_gem_dmabuf_vunmap(). Userspace mmap support is provided by * drm_gem_dmabuf_mmap(). * * Note that these export helpers can only be used if the underlying backing * storage is fully coherent and either permanently pinned, or it is safe to pin * it indefinitely. * * FIXME: The underlying helper functions are named rather inconsistently. * * Importing buffers * ~~~~~~~~~~~~~~~~~ * * Importing dma-bufs using drm_gem_prime_import() relies on * &drm_driver.gem_prime_import_sg_table. * * Note that similarly to the export helpers this permanently pins the * underlying backing storage. Which is ok for scanout, but is not the best * option for sharing lots of buffers for rendering. */ /** * drm_gem_map_attach - dma_buf attach implementation for GEM * @dma_buf: buffer to attach device to * @attach: buffer attachment data * * Calls &drm_gem_object_funcs.pin for device specific handling. This can be * used as the &dma_buf_ops.attach callback. Must be used together with * drm_gem_map_detach(). * * Returns 0 on success, negative error code on failure. */ int drm_gem_map_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; /* * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers * that implement their own ->map_dma_buf() do not. */ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf && !obj->funcs->get_sg_table) return -ENOSYS; return drm_gem_pin(obj); } EXPORT_SYMBOL(drm_gem_map_attach); /** * drm_gem_map_detach - dma_buf detach implementation for GEM * @dma_buf: buffer to detach from * @attach: attachment to be detached * * Calls &drm_gem_object_funcs.pin for device specific handling. Cleans up * &dma_buf_attachment from drm_gem_map_attach(). This can be used as the * &dma_buf_ops.detach callback. */ void drm_gem_map_detach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; drm_gem_unpin(obj); } EXPORT_SYMBOL(drm_gem_map_detach); /** * drm_gem_map_dma_buf - map_dma_buf implementation for GEM * @attach: attachment whose scatterlist is to be returned * @dir: direction of DMA transfer * * Calls &drm_gem_object_funcs.get_sg_table and then maps the scatterlist. This * can be used as the &dma_buf_ops.map_dma_buf callback. Should be used together * with drm_gem_unmap_dma_buf(). * * Returns:sg_table containing the scatterlist to be returned; returns ERR_PTR * on error. May return -EINTR if it is interrupted by a signal. */ struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { struct drm_gem_object *obj = attach->dmabuf->priv; struct sg_table *sgt; int ret; if (WARN_ON(dir == DMA_NONE)) return ERR_PTR(-EINVAL); if (WARN_ON(!obj->funcs->get_sg_table)) return ERR_PTR(-ENOSYS); sgt = obj->funcs->get_sg_table(obj); if (IS_ERR(sgt)) return sgt; ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); if (ret) { sg_free_table(sgt); kfree(sgt); sgt = ERR_PTR(ret); } return sgt; } EXPORT_SYMBOL(drm_gem_map_dma_buf); /** * drm_gem_unmap_dma_buf - unmap_dma_buf implementation for GEM * @attach: attachment to unmap buffer from * @sgt: scatterlist info of the buffer to unmap * @dir: direction of DMA transfer * * This can be used as the &dma_buf_ops.unmap_dma_buf callback. */ void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { if (!sgt) return; dma_unmap_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sgt); kfree(sgt); } EXPORT_SYMBOL(drm_gem_unmap_dma_buf); /** * drm_gem_dmabuf_vmap - dma_buf vmap implementation for GEM * @dma_buf: buffer to be mapped * @map: the virtual address of the buffer * * Sets up a kernel virtual mapping. This can be used as the &dma_buf_ops.vmap * callback. Calls into &drm_gem_object_funcs.vmap for device specific handling. * The kernel virtual address is returned in map. * * Returns 0 on success or a negative errno code otherwise. */ int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) { struct drm_gem_object *obj = dma_buf->priv; return drm_gem_vmap(obj, map); } EXPORT_SYMBOL(drm_gem_dmabuf_vmap); /** * drm_gem_dmabuf_vunmap - dma_buf vunmap implementation for GEM * @dma_buf: buffer to be unmapped * @map: the virtual address of the buffer * * Releases a kernel virtual mapping. This can be used as the * &dma_buf_ops.vunmap callback. Calls into &drm_gem_object_funcs.vunmap for device specific handling. */ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map) { struct drm_gem_object *obj = dma_buf->priv; drm_gem_vunmap(obj, map); } EXPORT_SYMBOL(drm_gem_dmabuf_vunmap); /** * drm_gem_prime_mmap - PRIME mmap function for GEM drivers * @obj: GEM object * @vma: Virtual address range * * This function sets up a userspace mapping for PRIME exported buffers using * the same codepath that is used for regular GEM buffer mapping on the DRM fd. * The fake GEM offset is added to vma->vm_pgoff and &drm_driver->fops->mmap is * called to set up the mapping. */ int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct drm_file *priv; struct file *fil; int ret; /* Add the fake offset */ vma->vm_pgoff += drm_vma_node_start(&obj->vma_node); if (obj->funcs && obj->funcs->mmap) { vma->vm_ops = obj->funcs->vm_ops; drm_gem_object_get(obj); ret = obj->funcs->mmap(obj, vma); if (ret) { drm_gem_object_put(obj); return ret; } vma->vm_private_data = obj; return 0; } priv = kzalloc(sizeof(*priv), GFP_KERNEL); fil = kzalloc(sizeof(*fil), GFP_KERNEL); if (!priv || !fil) { ret = -ENOMEM; goto out; } /* Used by drm_gem_mmap() to lookup the GEM object */ priv->minor = obj->dev->primary; fil->private_data = priv; ret = drm_vma_node_allow(&obj->vma_node, priv); if (ret) goto out; ret = obj->dev->driver->fops->mmap(fil, vma); drm_vma_node_revoke(&obj->vma_node, priv); out: kfree(priv); kfree(fil); return ret; } EXPORT_SYMBOL(drm_gem_prime_mmap); /** * drm_gem_dmabuf_mmap - dma_buf mmap implementation for GEM * @dma_buf: buffer to be mapped * @vma: virtual address range * * Provides memory mapping for the buffer. This can be used as the * &dma_buf_ops.mmap callback. It just forwards to drm_gem_prime_mmap(). * * Returns 0 on success or a negative error code on failure. */ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { struct drm_gem_object *obj = dma_buf->priv; return drm_gem_prime_mmap(obj, vma); } EXPORT_SYMBOL(drm_gem_dmabuf_mmap); static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = { .cache_sgt_mapping = true, .attach = drm_gem_map_attach, .detach = drm_gem_map_detach, .map_dma_buf = drm_gem_map_dma_buf, .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, }; /** * drm_prime_pages_to_sg - converts a page array into an sg list * @dev: DRM device * @pages: pointer to the array of page pointers to convert * @nr_pages: length of the page vector * * This helper creates an sg table object from a set of pages * the driver is responsible for mapping the pages into the * importers address space for use with dma_buf itself. * * This is useful for implementing &drm_gem_object_funcs.get_sg_table. */ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, struct page **pages, unsigned int nr_pages) { struct sg_table *sg; size_t max_segment = 0; int err; sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (!sg) return ERR_PTR(-ENOMEM); if (dev) max_segment = dma_max_mapping_size(dev->dev); if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); sg = ERR_PTR(err); } return sg; } EXPORT_SYMBOL(drm_prime_pages_to_sg); /** * drm_prime_get_contiguous_size - returns the contiguous size of the buffer * @sgt: sg_table describing the buffer to check * * This helper calculates the contiguous size in the DMA address space * of the buffer described by the provided sg_table. * * This is useful for implementing * &drm_gem_object_funcs.gem_prime_import_sg_table. */ unsigned long drm_prime_get_contiguous_size(struct sg_table *sgt) { dma_addr_t expected = sg_dma_address(sgt->sgl); struct scatterlist *sg; unsigned long size = 0; int i; for_each_sgtable_dma_sg(sgt, sg, i) { unsigned int len = sg_dma_len(sg); if (!len) break; if (sg_dma_address(sg) != expected) break; expected += len; size += len; } return size; } EXPORT_SYMBOL(drm_prime_get_contiguous_size); /** * drm_gem_prime_export - helper library implementation of the export callback * @obj: GEM object to export * @flags: flags like DRM_CLOEXEC and DRM_RDWR * * This is the implementation of the &drm_gem_object_funcs.export functions for GEM drivers * using the PRIME helpers. It is used as the default in * drm_gem_prime_handle_to_fd(). */ struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj, int flags) { struct drm_device *dev = obj->dev; struct dma_buf_export_info exp_info = { .exp_name = KBUILD_MODNAME, /* white lie for debug */ .owner = dev->driver->fops->owner, .ops = &drm_gem_prime_dmabuf_ops, .size = obj->size, .flags = flags, .priv = obj, .resv = obj->resv, }; return drm_gem_dmabuf_export(dev, &exp_info); } EXPORT_SYMBOL(drm_gem_prime_export); /** * drm_gem_prime_import_dev - core implementation of the import callback * @dev: drm_device to import into * @dma_buf: dma-buf object to import * @attach_dev: struct device to dma_buf attach * * This is the core of drm_gem_prime_import(). It's designed to be called by * drivers who want to use a different device structure than &drm_device.dev for * attaching via dma_buf. This function calls * &drm_driver.gem_prime_import_sg_table internally. * * Drivers must arrange to call drm_prime_gem_destroy() from their * &drm_gem_object_funcs.free hook when using this function. */ struct drm_gem_object *drm_gem_prime_import_dev(struct drm_device *dev, struct dma_buf *dma_buf, struct device *attach_dev) { struct dma_buf_attachment *attach; struct sg_table *sgt; struct drm_gem_object *obj; int ret; if (dma_buf->ops == &drm_gem_prime_dmabuf_ops) { obj = dma_buf->priv; if (obj->dev == dev) { /* * Importing dmabuf exported from our own gem increases * refcount on gem itself instead of f_count of dmabuf. */ drm_gem_object_get(obj); return obj; } } if (!dev->driver->gem_prime_import_sg_table) return ERR_PTR(-EINVAL); attach = dma_buf_attach(dma_buf, attach_dev); if (IS_ERR(attach)) return ERR_CAST(attach); get_dma_buf(dma_buf); sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); goto fail_detach; } obj = dev->driver->gem_prime_import_sg_table(dev, attach, sgt); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto fail_unmap; } obj->import_attach = attach; obj->resv = dma_buf->resv; return obj; fail_unmap: dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL); fail_detach: dma_buf_detach(dma_buf, attach); dma_buf_put(dma_buf); return ERR_PTR(ret); } EXPORT_SYMBOL(drm_gem_prime_import_dev); /** * drm_gem_prime_import - helper library implementation of the import callback * @dev: drm_device to import into * @dma_buf: dma-buf object to import * * This is the implementation of the gem_prime_import functions for GEM drivers * using the PRIME helpers. Drivers can use this as their * &drm_driver.gem_prime_import implementation. It is used as the default * implementation in drm_gem_prime_fd_to_handle(). * * Drivers must arrange to call drm_prime_gem_destroy() from their * &drm_gem_object_funcs.free hook when using this function. */ struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { return drm_gem_prime_import_dev(dev, dma_buf, dev->dev); } EXPORT_SYMBOL(drm_gem_prime_import); /** * drm_prime_sg_to_page_array - convert an sg table into a page array * @sgt: scatter-gather table to convert * @pages: array of page pointers to store the pages in * @max_entries: size of the passed-in array * * Exports an sg table into an array of pages. * * This function is deprecated and strongly discouraged to be used. * The page array is only useful for page faults and those can corrupt fields * in the struct page if they are not handled by the exporting driver. */ int __deprecated drm_prime_sg_to_page_array(struct sg_table *sgt, struct page **pages, int max_entries) { struct sg_page_iter page_iter; struct page **p = pages; for_each_sgtable_page(sgt, &page_iter, 0) { if (WARN_ON(p - pages >= max_entries)) return -1; *p++ = sg_page_iter_page(&page_iter); } return 0; } EXPORT_SYMBOL(drm_prime_sg_to_page_array); /** * drm_prime_sg_to_dma_addr_array - convert an sg table into a dma addr array * @sgt: scatter-gather table to convert * @addrs: array to store the dma bus address of each page * @max_entries: size of both the passed-in arrays * * Exports an sg table into an array of addresses. * * Drivers should use this in their &drm_driver.gem_prime_import_sg_table * implementation. */ int drm_prime_sg_to_dma_addr_array(struct sg_table *sgt, dma_addr_t *addrs, int max_entries) { struct sg_dma_page_iter dma_iter; dma_addr_t *a = addrs; for_each_sgtable_dma_page(sgt, &dma_iter, 0) { if (WARN_ON(a - addrs >= max_entries)) return -1; *a++ = sg_page_iter_dma_address(&dma_iter); } return 0; } EXPORT_SYMBOL(drm_prime_sg_to_dma_addr_array); /** * drm_prime_gem_destroy - helper to clean up a PRIME-imported GEM object * @obj: GEM object which was created from a dma-buf * @sg: the sg-table which was pinned at import time * * This is the cleanup functions which GEM drivers need to call when they use * drm_gem_prime_import() or drm_gem_prime_import_dev() to import dma-bufs. */ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) { struct dma_buf_attachment *attach; struct dma_buf *dma_buf; attach = obj->import_attach; if (sg) dma_buf_unmap_attachment_unlocked(attach, sg, DMA_BIDIRECTIONAL); dma_buf = attach->dmabuf; dma_buf_detach(attach->dmabuf, attach); /* remove the reference */ dma_buf_put(dma_buf); } EXPORT_SYMBOL(drm_prime_gem_destroy);
69 69 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 /* SPDX-License-Identifier: GPL-2.0-only */ /* * kernfs.h - pseudo filesystem decoupled from vfs locking */ #ifndef __LINUX_KERNFS_H #define __LINUX_KERNFS_H #include <linux/err.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/idr.h> #include <linux/lockdep.h> #include <linux/rbtree.h> #include <linux/atomic.h> #include <linux/bug.h> #include <linux/types.h> #include <linux/uidgid.h> #include <linux/wait.h> #include <linux/rwsem.h> #include <linux/cache.h> struct file; struct dentry; struct iattr; struct seq_file; struct vm_area_struct; struct vm_operations_struct; struct super_block; struct file_system_type; struct poll_table_struct; struct fs_context; struct kernfs_fs_context; struct kernfs_open_node; struct kernfs_iattrs; /* * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash * table of locks. * Having a small hash table would impact scalability, since * more and more kernfs_node objects will end up using same lock * and having a very large hash table would waste memory. * * At the moment size of hash table of locks is being set based on * the number of CPUs as follows: * * NR_CPU NR_KERNFS_LOCK_BITS NR_KERNFS_LOCKS * 1 1 2 * 2-3 2 4 * 4-7 4 16 * 8-15 6 64 * 16-31 8 256 * 32 and more 10 1024 * * The above relation between NR_CPU and number of locks is based * on some internal experimentation which involved booting qemu * with different values of smp, performing some sysfs operations * on all CPUs and observing how increase in number of locks impacts * completion time of these sysfs operations on each CPU. */ #ifdef CONFIG_SMP #define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32))) #else #define NR_KERNFS_LOCK_BITS 1 #endif #define NR_KERNFS_LOCKS (1 << NR_KERNFS_LOCK_BITS) /* * There's one kernfs_open_file for each open file and one kernfs_open_node * for each kernfs_node with one or more open files. * * filp->private_data points to seq_file whose ->private points to * kernfs_open_file. * * kernfs_open_files are chained at kernfs_open_node->files, which is * protected by kernfs_global_locks.open_file_mutex[i]. * * To reduce possible contention in sysfs access, arising due to single * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node * object address as hash keys to get the index of these locks. * * Hashed mutexes are safe to use here because operations using these don't * rely on global exclusion. * * In future we intend to replace other global locks with hashed ones as well. * kernfs_global_locks acts as a holder for all such hash tables. */ struct kernfs_global_locks { struct mutex open_file_mutex[NR_KERNFS_LOCKS]; }; enum kernfs_node_type { KERNFS_DIR = 0x0001, KERNFS_FILE = 0x0002, KERNFS_LINK = 0x0004, }; #define KERNFS_TYPE_MASK 0x000f #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK #define KERNFS_MAX_USER_XATTRS 128 #define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, KERNFS_NS = 0x0020, KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, KERNFS_HIDDEN = 0x0200, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, KERNFS_HAS_RELEASE = 0x2000, KERNFS_REMOVING = 0x4000, }; /* @flags for kernfs_create_root() */ enum kernfs_root_flag { /* * kernfs_nodes are created in the deactivated state and invisible. * They require explicit kernfs_activate() to become visible. This * can be used to make related nodes become visible atomically * after all nodes are created successfully. */ KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, /* * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2) * succeeds regardless of the RW permissions. sysfs had an extra * layer of enforcement where open(2) fails with -EACCES regardless * of CAP_DAC_OVERRIDE if the permission doesn't have the * respective read or write access at all (none of S_IRUGO or * S_IWUGO) or the respective operation isn't implemented. The * following flag enables that behavior. */ KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002, /* * The filesystem supports exportfs operation, so userspace can use * fhandle to access nodes of the fs. */ KERNFS_ROOT_SUPPORT_EXPORTOP = 0x0004, /* * Support user xattrs to be written to nodes rooted at this root. */ KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, }; /* type-specific structures for kernfs_node union members */ struct kernfs_elem_dir { unsigned long subdirs; /* children rbtree starts here and goes through kn->rb */ struct rb_root children; /* * The kernfs hierarchy this directory belongs to. This fits * better directly in kernfs_node but is here to save space. */ struct kernfs_root *root; /* * Monotonic revision counter, used to identify if a directory * node has changed during negative dentry revalidation. */ unsigned long rev; }; struct kernfs_elem_symlink { struct kernfs_node *target_kn; }; struct kernfs_elem_attr { const struct kernfs_ops *ops; struct kernfs_open_node __rcu *open; loff_t size; struct kernfs_node *notify_next; /* for kernfs_notify() */ }; /* * kernfs_node - the building block of kernfs hierarchy. Each and every * kernfs node is represented by single kernfs_node. Most fields are * private to kernfs and shouldn't be accessed directly by kernfs users. * * As long as count reference is held, the kernfs_node itself is * accessible. Dereferencing elem or any other outer entity requires * active reference. */ struct kernfs_node { atomic_t count; atomic_t active; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* * Use kernfs_get_parent() and kernfs_name/path() instead of * accessing the following two fields directly. If the node is * never moved to a different parent, it is safe to access the * parent directly. */ struct kernfs_node *parent; const char *name; struct rb_node rb; const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ unsigned short flags; umode_t mode; union { struct kernfs_elem_dir dir; struct kernfs_elem_symlink symlink; struct kernfs_elem_attr attr; }; /* * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit, * the low 32bits are ino and upper generation. */ u64 id; void *priv; struct kernfs_iattrs *iattr; struct rcu_head rcu; }; /* * kernfs_syscall_ops may be specified on kernfs_create_root() to support * syscalls. These optional callbacks are invoked on the matching syscalls * and can perform any kernfs operations which don't necessarily have to be * the exact operation requested. An active reference is held for each * kernfs_node parameter. */ struct kernfs_syscall_ops { int (*show_options)(struct seq_file *sf, struct kernfs_root *root); int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); int (*rmdir)(struct kernfs_node *kn); int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name); int (*show_path)(struct seq_file *sf, struct kernfs_node *kn, struct kernfs_root *root); }; struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root); struct kernfs_open_file { /* published fields */ struct kernfs_node *kn; struct file *file; struct seq_file *seq_file; void *priv; /* private fields, do not use outside kernfs proper */ struct mutex mutex; struct mutex prealloc_mutex; int event; struct list_head list; char *prealloc_buf; size_t atomic_write_len; bool mmapped:1; bool released:1; const struct vm_operations_struct *vm_ops; }; struct kernfs_ops { /* * Optional open/release methods. Both are called with * @of->seq_file populated. */ int (*open)(struct kernfs_open_file *of); void (*release)(struct kernfs_open_file *of); /* * Read is handled by either seq_file or raw_read(). * * If seq_show() is present, seq_file path is active. Other seq * operations are optional and if not implemented, the behavior is * equivalent to single_open(). @sf->private points to the * associated kernfs_open_file. * * read() is bounced through kernel buffer and a read larger than * PAGE_SIZE results in partial operation of PAGE_SIZE. */ int (*seq_show)(struct seq_file *sf, void *v); void *(*seq_start)(struct seq_file *sf, loff_t *ppos); void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); void (*seq_stop)(struct seq_file *sf, void *v); ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); /* * write() is bounced through kernel buffer. If atomic_write_len * is not set, a write larger than PAGE_SIZE results in partial * operations of PAGE_SIZE chunks. If atomic_write_len is set, * writes upto the specified size are executed atomically but * larger ones are rejected with -E2BIG. */ size_t atomic_write_len; /* * "prealloc" causes a buffer to be allocated at open for * all read/write requests. As ->seq_show uses seq_read() * which does its own allocation, it is incompatible with * ->prealloc. Provide ->read and ->write with ->prealloc. */ bool prealloc; ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); __poll_t (*poll)(struct kernfs_open_file *of, struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence); }; /* * The kernfs superblock creation/mount parameter context. */ struct kernfs_fs_context { struct kernfs_root *root; /* Root of the hierarchy being mounted */ void *ns_tag; /* Namespace tag of the mount (or NULL) */ unsigned long magic; /* File system specific magic number */ /* The following are set/used by kernfs_mount() */ bool new_sb_created; /* Set to T if we allocated a new sb */ }; #ifdef CONFIG_KERNFS static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return kn->flags & KERNFS_TYPE_MASK; } static inline ino_t kernfs_id_ino(u64 id) { /* id is ino if ino_t is 64bit; otherwise, low 32bits */ if (sizeof(ino_t) >= sizeof(u64)) return id; else return (u32)id; } static inline u32 kernfs_id_gen(u64 id) { /* gen is fixed at 1 if ino_t is 64bit; otherwise, high 32bits */ if (sizeof(ino_t) >= sizeof(u64)) return 1; else return id >> 32; } static inline ino_t kernfs_ino(struct kernfs_node *kn) { return kernfs_id_ino(kn->id); } static inline ino_t kernfs_gen(struct kernfs_node *kn) { return kernfs_id_gen(kn->id); } /** * kernfs_enable_ns - enable namespace under a directory * @kn: directory of interest, should be empty * * This is to be called right after @kn is created to enable namespace * under it. All children of @kn must have non-NULL namespace tags and * only the ones which match the super_block's tag will be visible. */ static inline void kernfs_enable_ns(struct kernfs_node *kn) { WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR); WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children)); kn->flags |= KERNFS_NS; } /** * kernfs_ns_enabled - test whether namespace is enabled * @kn: the node to test * * Test whether namespace filtering is enabled for the children of @ns. */ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return kn->flags & KERNFS_NS; } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, void *priv, const void *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); void kernfs_activate(struct kernfs_node *kn); void kernfs_show(struct kernfs_node *kn, bool show); void kernfs_remove(struct kernfs_node *kn); void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns); int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt); void kernfs_notify(struct kernfs_node *kn); int kernfs_xattr_get(struct kernfs_node *kn, const char *name, void *value, size_t size); int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags); const void *kernfs_super_ns(struct super_block *sb); int kernfs_get_tree(struct fs_context *fc); void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, u64 id); #else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return 0; } /* whatever */ static inline void kernfs_enable_ns(struct kernfs_node *kn) { } static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return false; } static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } static inline int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) { return NULL; } static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { return NULL; } static inline struct kernfs_node * kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns) { return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) { return NULL; } static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { return NULL; } static inline struct inode * kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) { return NULL; } static inline struct kernfs_root * kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, void *priv, const void *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target) { return ERR_PTR(-ENOSYS); } static inline void kernfs_activate(struct kernfs_node *kn) { } static inline void kernfs_remove(struct kernfs_node *kn) { } static inline bool kernfs_remove_self(struct kernfs_node *kn) { return false; } static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, const char *name, const void *ns) { return -ENOSYS; } static inline int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) { return -ENOSYS; } static inline int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { return -ENOSYS; } static inline __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt) { return -ENOSYS; } static inline void kernfs_notify(struct kernfs_node *kn) { } static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name, void *value, size_t size) { return -ENOSYS; } static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { return -ENOSYS; } static inline const void *kernfs_super_ns(struct super_block *sb) { return NULL; } static inline int kernfs_get_tree(struct fs_context *fc) { return -ENOSYS; } static inline void kernfs_free_fs_context(struct fs_context *fc) { } static inline void kernfs_kill_sb(struct super_block *sb) { } static inline void kernfs_init(void) { } #endif /* CONFIG_KERNFS */ /** * kernfs_path - build full path of a given node * @kn: kernfs_node of interest * @buf: buffer to copy @kn's name into * @buflen: size of @buf * * If @kn is NULL result will be "(null)". * * Returns the length of the full path. If the full length is equal to or * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) { return kernfs_path_from_node(kn, NULL, buf, buflen); } static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) { return kernfs_find_and_get_ns(kn, name, NULL); } static inline struct kernfs_node * kernfs_walk_and_get(struct kernfs_node *kn, const char *path) { return kernfs_walk_and_get_ns(kn, path, NULL); } static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) { return kernfs_create_dir_ns(parent, name, mode, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, priv, NULL); } static inline int kernfs_remove_by_name(struct kernfs_node *parent, const char *name) { return kernfs_remove_by_name_ns(parent, name, NULL); } static inline int kernfs_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name) { return kernfs_rename_ns(kn, new_parent, new_name, NULL); } #endif /* __LINUX_KERNFS_H */
8 6 17 24 17 37 43 21 21 16 18 56 5 11 1 6 26 1 27 4 9 6 6 2 2 1 1 83 156 13 106 47 1 13 27 19 6 25 25 42 1 9 18 15 7 8 9 7 7 7 7 6 4 4 4 4 2 1 1 1 9 3 4 1 2 1 1 1 4 4 3 2 1 1 2 1 2 2 2 1 1 1 1 6 6 6 6 2 2 1 1 2 3 1 2 7 4 3 6 13 1 12 11 8 3 1 4 63 47 15 44 15 29 26 3 48 2 9 2 6 29 13 29 1 6 20 9 3 1 1 2 3 3 3 1 1 1 3 4 3 4 3 42 17 236 7 5 1 3 137 1 138 82 55 35 1 24 14 2 3 7 91 1 2 2 2 2 2 24 4 7 2 5 4 5 2 1 2 17 17 7 35 2 34 33 27 10 15 4 17 37 7 15 31 6 26 23 21 11 19 37 39 8 31 2 37 2 2 36 1 27 10 9 1 37 34 37 27 9 1 7 20 12 5 1 2 1 2 2 6 2 8 3 8 12 7 19 41 41 41 35 1 1 36 19 4 4 7 1 35 1 34 5 1 35 58 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. */ #include "fuse_i.h" #include <linux/pagemap.h> #include <linux/file.h> #include <linux/fs_context.h> #include <linux/moduleparam.h> #include <linux/sched.h> #include <linux/namei.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/iversion.h> #include <linux/posix_acl.h> #include <linux/security.h> #include <linux/types.h> #include <linux/kernel.h> static bool __read_mostly allow_sys_admin_access; module_param(allow_sys_admin_access, bool, 0644); MODULE_PARM_DESC(allow_sys_admin_access, "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check"); static void fuse_advise_use_readdirplus(struct inode *dir) { struct fuse_inode *fi = get_fuse_inode(dir); set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); } #if BITS_PER_LONG >= 64 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) { entry->d_fsdata = (void *) time; } static inline u64 fuse_dentry_time(const struct dentry *entry) { return (u64)entry->d_fsdata; } #else union fuse_dentry { u64 time; struct rcu_head rcu; }; static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) { ((union fuse_dentry *) dentry->d_fsdata)->time = time; } static inline u64 fuse_dentry_time(const struct dentry *entry) { return ((union fuse_dentry *) entry->d_fsdata)->time; } #endif static void fuse_dentry_settime(struct dentry *dentry, u64 time) { struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); bool delete = !time && fc->delete_stale; /* * Mess with DCACHE_OP_DELETE because dput() will be faster without it. * Don't care about races, either way it's just an optimization */ if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) || (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) { spin_lock(&dentry->d_lock); if (!delete) dentry->d_flags &= ~DCACHE_OP_DELETE; else dentry->d_flags |= DCACHE_OP_DELETE; spin_unlock(&dentry->d_lock); } __fuse_dentry_settime(dentry, time); } /* * FUSE caches dentries and attributes with separate timeout. The * time in jiffies until the dentry/attributes are valid is stored in * dentry->d_fsdata and fuse_inode->i_time respectively. */ /* * Calculate the time in jiffies until a dentry/attributes are valid */ u64 fuse_time_to_jiffies(u64 sec, u32 nsec) { if (sec || nsec) { struct timespec64 ts = { sec, min_t(u32, nsec, NSEC_PER_SEC - 1) }; return get_jiffies_64() + timespec64_to_jiffies(&ts); } else return 0; } /* * Set dentry and possibly attribute timeouts from the lookup/mk* * replies */ void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o) { fuse_dentry_settime(entry, fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); } void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) { set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); } /* * Mark the attributes as stale, so that at the next call to * ->getattr() they will be fetched from userspace */ void fuse_invalidate_attr(struct inode *inode) { fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS); } static void fuse_dir_changed(struct inode *dir) { fuse_invalidate_attr(dir); inode_maybe_inc_iversion(dir, false); } /* * Mark the attributes as stale due to an atime change. Avoid the invalidate if * atime is not used. */ void fuse_invalidate_atime(struct inode *inode) { if (!IS_RDONLY(inode)) fuse_invalidate_attr_mask(inode, STATX_ATIME); } /* * Just mark the entry as stale, so that a next attempt to look it up * will result in a new lookup call to userspace * * This is called when a dentry is about to become negative and the * timeout is unknown (unlink, rmdir, rename and in some cases * lookup) */ void fuse_invalidate_entry_cache(struct dentry *entry) { fuse_dentry_settime(entry, 0); } /* * Same as fuse_invalidate_entry_cache(), but also try to remove the * dentry from the hash */ static void fuse_invalidate_entry(struct dentry *entry) { d_invalidate(entry); fuse_invalidate_entry_cache(entry); } static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg) { memset(outarg, 0, sizeof(struct fuse_entry_out)); args->opcode = FUSE_LOOKUP; args->nodeid = nodeid; args->in_numargs = 3; fuse_set_zero_arg0(args); args->in_args[1].size = name->len; args->in_args[1].value = name->name; args->in_args[2].size = 1; args->in_args[2].value = ""; args->out_numargs = 1; args->out_args[0].size = sizeof(struct fuse_entry_out); args->out_args[0].value = outarg; } /* * Check whether the dentry is still valid * * If the entry validity timeout has expired and the dentry is * positive, try to redo the lookup. If the lookup results in a * different inode, then let the VFS invalidate the dentry and redo * the lookup once more. If the lookup results in the same inode, * then refresh the attributes, timeouts and mark the dentry valid. */ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, struct dentry *entry, unsigned int flags) { struct inode *inode; struct fuse_mount *fm; struct fuse_inode *fi; int ret; inode = d_inode_rcu(entry); if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) { struct fuse_entry_out outarg; FUSE_ARGS(args); struct fuse_forget_link *forget; u64 attr_version; /* For negative dentries, always do a fresh lookup */ if (!inode) goto invalid; ret = -ECHILD; if (flags & LOOKUP_RCU) goto out; fm = get_fuse_mount(inode); forget = fuse_alloc_forget(); ret = -ENOMEM; if (!forget) goto out; attr_version = fuse_get_attr_version(fm->fc); fuse_lookup_init(fm->fc, &args, get_node_id(dir), name, &outarg); ret = fuse_simple_request(fm, &args); /* Zero nodeid is same as -ENOENT */ if (!ret && !outarg.nodeid) ret = -ENOENT; if (!ret) { fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode) || (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); goto invalid; } spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); } kfree(forget); if (ret == -ENOMEM || ret == -EINTR) goto out; if (ret || fuse_invalid_attr(&outarg.attr) || fuse_stale_inode(inode, outarg.generation, &outarg.attr)) goto invalid; forget_all_cached_acls(inode); fuse_change_attributes(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), attr_version); fuse_change_entry_timeout(entry, &outarg); } else if (inode) { fi = get_fuse_inode(inode); if (flags & LOOKUP_RCU) { if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) return -ECHILD; } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { fuse_advise_use_readdirplus(dir); } } ret = 1; out: return ret; invalid: ret = 0; goto out; } #if BITS_PER_LONG < 64 static int fuse_dentry_init(struct dentry *dentry) { dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); return dentry->d_fsdata ? 0 : -ENOMEM; } static void fuse_dentry_release(struct dentry *dentry) { union fuse_dentry *fd = dentry->d_fsdata; kfree_rcu(fd, rcu); } #endif static int fuse_dentry_delete(const struct dentry *dentry) { return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); } /* * Create a fuse_mount object with a new superblock (with path->dentry * as the root), and return that mount so it can be auto-mounted on * @path. */ static struct vfsmount *fuse_dentry_automount(struct path *path) { struct fs_context *fsc; struct vfsmount *mnt; struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry)); fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); if (IS_ERR(fsc)) return ERR_CAST(fsc); /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */ fsc->fs_private = mp_fi; /* Create the submount */ mnt = fc_mount(fsc); if (!IS_ERR(mnt)) mntget(mnt); put_fs_context(fsc); return mnt; } const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_delete = fuse_dentry_delete, #if BITS_PER_LONG < 64 .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, #endif .d_automount = fuse_dentry_automount, }; const struct dentry_operations fuse_root_dentry_operations = { #if BITS_PER_LONG < 64 .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, #endif }; int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); } static bool fuse_valid_size(u64 size) { return size <= LLONG_MAX; } bool fuse_invalid_attr(struct fuse_attr *attr) { return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size); } int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode) { struct fuse_mount *fm = get_fuse_mount_super(sb); FUSE_ARGS(args); struct fuse_forget_link *forget; u64 attr_version, evict_ctr; int err; *inode = NULL; err = -ENAMETOOLONG; if (name->len > FUSE_NAME_MAX) goto out; forget = fuse_alloc_forget(); err = -ENOMEM; if (!forget) goto out; attr_version = fuse_get_attr_version(fm->fc); evict_ctr = fuse_get_evict_ctr(fm->fc); fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); err = fuse_simple_request(fm, &args); /* Zero nodeid is same as -ENOENT, but with valid timeout */ if (err || !outarg->nodeid) goto out_put_forget; err = -EIO; if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) { pr_warn_once("root generation should be zero\n"); outarg->generation = 0; } *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, &outarg->attr, ATTR_TIMEOUT(outarg), attr_version, evict_ctr); err = -ENOMEM; if (!*inode) { fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); goto out; } err = 0; out_put_forget: kfree(forget); out: return err; } static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) { int err; struct fuse_entry_out outarg; struct inode *inode; struct dentry *newent; bool outarg_valid = true; bool locked; if (fuse_is_bad(dir)) return ERR_PTR(-EIO); locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); fuse_unlock_inode(dir, locked); if (err == -ENOENT) { outarg_valid = false; err = 0; } if (err) goto out_err; err = -EIO; if (inode && get_node_id(inode) == FUSE_ROOT_ID) goto out_iput; newent = d_splice_alias(inode, entry); err = PTR_ERR(newent); if (IS_ERR(newent)) goto out_err; entry = newent ? newent : entry; if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else fuse_invalidate_entry_cache(entry); if (inode) fuse_advise_use_readdirplus(dir); return newent; out_iput: iput(inode); out_err: return ERR_PTR(err); } static int get_security_context(struct dentry *entry, umode_t mode, struct fuse_in_arg *ext) { struct fuse_secctx *fctx; struct fuse_secctx_header *header; struct lsm_context lsmctx = { }; void *ptr; u32 total_len = sizeof(*header); int err, nr_ctx = 0; const char *name = NULL; size_t namelen; err = security_dentry_init_security(entry, mode, &entry->d_name, &name, &lsmctx); /* If no LSM is supporting this security hook ignore error */ if (err && err != -EOPNOTSUPP) goto out_err; if (lsmctx.len) { nr_ctx = 1; namelen = strlen(name) + 1; err = -EIO; if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || lsmctx.len > S32_MAX)) goto out_err; total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + lsmctx.len); } err = -ENOMEM; header = ptr = kzalloc(total_len, GFP_KERNEL); if (!ptr) goto out_err; header->nr_secctx = nr_ctx; header->size = total_len; ptr += sizeof(*header); if (nr_ctx) { fctx = ptr; fctx->size = lsmctx.len; ptr += sizeof(*fctx); strcpy(ptr, name); ptr += namelen; memcpy(ptr, lsmctx.context, lsmctx.len); } ext->size = total_len; ext->value = header; err = 0; out_err: if (nr_ctx) security_release_secctx(&lsmctx); return err; } static void *extend_arg(struct fuse_in_arg *buf, u32 bytes) { void *p; u32 newlen = buf->size + bytes; p = krealloc(buf->value, newlen, GFP_KERNEL); if (!p) { kfree(buf->value); buf->size = 0; buf->value = NULL; return NULL; } memset(p + buf->size, 0, bytes); buf->value = p; buf->size = newlen; return p + newlen - bytes; } static u32 fuse_ext_size(size_t size) { return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size); } /* * This adds just a single supplementary group that matches the parent's group. */ static int get_create_supp_group(struct mnt_idmap *idmap, struct inode *dir, struct fuse_in_arg *ext) { struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_ext_header *xh; struct fuse_supp_groups *sg; kgid_t kgid = dir->i_gid; vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid); gid_t parent_gid = from_kgid(fc->user_ns, kgid); u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0])); if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) || !vfsgid_in_group_p(vfsgid)) return 0; xh = extend_arg(ext, sg_len); if (!xh) return -ENOMEM; xh->size = sg_len; xh->type = FUSE_EXT_GROUPS; sg = (struct fuse_supp_groups *) &xh[1]; sg->nr_groups = 1; sg->groups[0] = parent_gid; return 0; } static int get_create_ext(struct mnt_idmap *idmap, struct fuse_args *args, struct inode *dir, struct dentry *dentry, umode_t mode) { struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); struct fuse_in_arg ext = { .size = 0, .value = NULL }; int err = 0; if (fc->init_security) err = get_security_context(dentry, mode, &ext); if (!err && fc->create_supp_group) err = get_create_supp_group(idmap, dir, &ext); if (!err && ext.size) { WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); args->is_ext = true; args->ext_idx = args->in_numargs++; args->in_args[args->ext_idx] = ext; } else { kfree(ext.value); } return err; } static void free_ext_value(struct fuse_args *args) { if (args->is_ext) kfree(args->in_args[args->ext_idx].value); } /* * Atomic create+open operation * * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, struct file *file, unsigned int flags, umode_t mode, u32 opcode) { int err; struct inode *inode; struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); struct fuse_forget_link *forget; struct fuse_create_in inarg; struct fuse_open_out *outopenp; struct fuse_entry_out outentry; struct fuse_inode *fi; struct fuse_file *ff; bool trunc = flags & O_TRUNC; /* Userspace expects S_IFREG in create mode */ BUG_ON((mode & S_IFMT) != S_IFREG); forget = fuse_alloc_forget(); err = -ENOMEM; if (!forget) goto out_err; err = -ENOMEM; ff = fuse_file_alloc(fm, true); if (!ff) goto out_put_forget_req; if (!fm->fc->dont_mask) mode &= ~current_umask(); flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; inarg.umask = current_umask(); if (fm->fc->handle_killpriv_v2 && trunc && !(flags & O_EXCL) && !capable(CAP_FSETID)) { inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; } args.opcode = opcode; args.nodeid = get_node_id(dir); args.in_numargs = 2; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; args.out_numargs = 2; args.out_args[0].size = sizeof(outentry); args.out_args[0].value = &outentry; /* Store outarg for fuse_finish_open() */ outopenp = &ff->args->open_outarg; args.out_args[1].size = sizeof(*outopenp); args.out_args[1].value = outopenp; err = get_create_ext(idmap, &args, dir, entry, mode); if (err) goto out_free_ff; err = fuse_simple_idmap_request(idmap, fm, &args); free_ext_value(&args); if (err) goto out_free_ff; err = -EIO; if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) goto out_free_ff; ff->fh = outopenp->fh; ff->nodeid = outentry.nodeid; ff->open_flags = outopenp->open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(NULL, ff, flags); fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); err = -ENOMEM; goto out_err; } kfree(forget); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_dir_changed(dir); err = generic_file_open(inode, file); if (!err) { file->private_data = ff; err = finish_open(file, entry, fuse_finish_open); } if (err) { fi = get_fuse_inode(inode); fuse_sync_release(fi, ff, flags); } else { if (fm->fc->atomic_o_trunc && trunc) truncate_pagecache(inode, 0); else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); } return err; out_free_ff: fuse_file_free(ff); out_put_forget_req: kfree(forget); out_err: return err; } static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, umode_t mode) { int err; struct mnt_idmap *idmap = file_mnt_idmap(file); struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; if (fuse_is_bad(dir)) return -EIO; if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) return PTR_ERR(res); if (res) entry = res; } if (!(flags & O_CREAT) || d_really_is_positive(entry)) goto no_open; /* Only creates */ file->f_mode |= FMODE_CREATED; if (fc->no_create) goto mknod; err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE); if (err == -ENOSYS) { fc->no_create = 1; goto mknod; } else if (err == -EEXIST) fuse_invalidate_entry(entry); out_dput: dput(res); return err; mknod: err = fuse_mknod(idmap, dir, entry, mode, 0); if (err) goto out_dput; no_open: return finish_no_open(file, res); } /* * Code shared between mknod, mkdir, symlink and link */ static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm, struct fuse_args *args, struct inode *dir, struct dentry *entry, umode_t mode) { struct fuse_entry_out outarg; struct inode *inode; struct dentry *d; int err; struct fuse_forget_link *forget; if (fuse_is_bad(dir)) return -EIO; forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; memset(&outarg, 0, sizeof(outarg)); args->nodeid = get_node_id(dir); args->out_numargs = 1; args->out_args[0].size = sizeof(outarg); args->out_args[0].value = &outarg; if (args->opcode != FUSE_LINK) { err = get_create_ext(idmap, args, dir, entry, mode); if (err) goto out_put_forget_req; } err = fuse_simple_idmap_request(idmap, fm, args); free_ext_value(args); if (err) goto out_put_forget_req; err = -EIO; if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr)) goto out_put_forget_req; if ((outarg.attr.mode ^ mode) & S_IFMT) goto out_put_forget_req; inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0); if (!inode) { fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); return -ENOMEM; } kfree(forget); d_drop(entry); d = d_splice_alias(inode, entry); if (IS_ERR(d)) return PTR_ERR(d); if (d) { fuse_change_entry_timeout(d, &outarg); dput(d); } else { fuse_change_entry_timeout(entry, &outarg); } fuse_dir_changed(dir); return 0; out_put_forget_req: if (err == -EEXIST) fuse_invalidate_entry(entry); kfree(forget); return err; } static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev) { struct fuse_mknod_in inarg; struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); if (!fm->fc->dont_mask) mode &= ~current_umask(); memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); inarg.umask = current_umask(); args.opcode = FUSE_MKNOD; args.in_numargs = 2; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; return create_new_entry(idmap, fm, &args, dir, entry, mode); } static int fuse_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, umode_t mode, bool excl) { return fuse_mknod(idmap, dir, entry, mode, 0); } static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct fuse_conn *fc = get_fuse_conn(dir); int err; if (fc->no_tmpfile) return -EOPNOTSUPP; err = fuse_create_open(idmap, dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE); if (err == -ENOSYS) { fc->no_tmpfile = 1; err = -EOPNOTSUPP; } return err; } static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, umode_t mode) { struct fuse_mkdir_in inarg; struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); if (!fm->fc->dont_mask) mode &= ~current_umask(); memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.umask = current_umask(); args.opcode = FUSE_MKDIR; args.in_numargs = 2; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR); } static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, const char *link) { struct fuse_mount *fm = get_fuse_mount(dir); unsigned len = strlen(link) + 1; FUSE_ARGS(args); args.opcode = FUSE_SYMLINK; args.in_numargs = 3; fuse_set_zero_arg0(&args); args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; args.in_args[2].size = len; args.in_args[2].value = link; return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK); } void fuse_flush_time_update(struct inode *inode) { int err = sync_inode_metadata(inode, 1); mapping_set_error(inode->i_mapping, err); } static void fuse_update_ctime_in_cache(struct inode *inode) { if (!IS_NOCMTIME(inode)) { inode_set_ctime_current(inode); mark_inode_dirty_sync(inode); fuse_flush_time_update(inode); } } void fuse_update_ctime(struct inode *inode) { fuse_invalidate_attr_mask(inode, STATX_CTIME); fuse_update_ctime_in_cache(inode); } static void fuse_entry_unlinked(struct dentry *entry) { struct inode *inode = d_inode(entry); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); /* * If i_nlink == 0 then unlink doesn't make sense, yet this can * happen if userspace filesystem is careless. It would be * difficult to enforce correct nlink usage so just ignore this * condition here */ if (S_ISDIR(inode->i_mode)) clear_nlink(inode); else if (inode->i_nlink > 0) drop_nlink(inode); spin_unlock(&fi->lock); fuse_invalidate_entry_cache(entry); fuse_update_ctime(inode); } static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); if (fuse_is_bad(dir)) return -EIO; args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 2; fuse_set_zero_arg0(&args); args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { fuse_dir_changed(dir); fuse_entry_unlinked(entry); } else if (err == -EINTR || err == -ENOENT) fuse_invalidate_entry(entry); return err; } static int fuse_rmdir(struct inode *dir, struct dentry *entry) { int err; struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); if (fuse_is_bad(dir)) return -EIO; args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 2; fuse_set_zero_arg0(&args); args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { fuse_dir_changed(dir); fuse_entry_unlinked(entry); } else if (err == -EINTR || err == -ENOENT) fuse_invalidate_entry(entry); return err; } static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent, struct inode *newdir, struct dentry *newent, unsigned int flags, int opcode, size_t argsize) { int err; struct fuse_rename2_in inarg; struct fuse_mount *fm = get_fuse_mount(olddir); FUSE_ARGS(args); memset(&inarg, 0, argsize); inarg.newdir = get_node_id(newdir); inarg.flags = flags; args.opcode = opcode; args.nodeid = get_node_id(olddir); args.in_numargs = 3; args.in_args[0].size = argsize; args.in_args[0].value = &inarg; args.in_args[1].size = oldent->d_name.len + 1; args.in_args[1].value = oldent->d_name.name; args.in_args[2].size = newent->d_name.len + 1; args.in_args[2].value = newent->d_name.name; err = fuse_simple_idmap_request(idmap, fm, &args); if (!err) { /* ctime changes */ fuse_update_ctime(d_inode(oldent)); if (flags & RENAME_EXCHANGE) fuse_update_ctime(d_inode(newent)); fuse_dir_changed(olddir); if (olddir != newdir) fuse_dir_changed(newdir); /* newent will end up negative */ if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) fuse_entry_unlinked(newent); } else if (err == -EINTR || err == -ENOENT) { /* If request was interrupted, DEITY only knows if the rename actually took place. If the invalidation fails (e.g. some process has CWD under the renamed directory), then there can be inconsistency between the dcache and the real filesystem. Tough luck. */ fuse_invalidate_entry(oldent); if (d_really_is_positive(newent)) fuse_invalidate_entry(newent); } return err; } static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent, struct inode *newdir, struct dentry *newent, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(olddir); int err; if (fuse_is_bad(olddir)) return -EIO; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; if (flags) { if (fc->no_rename2 || fc->minor < 23) return -EINVAL; err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap, olddir, oldent, newdir, newent, flags, FUSE_RENAME2, sizeof(struct fuse_rename2_in)); if (err == -ENOSYS) { fc->no_rename2 = 1; err = -EINVAL; } } else { err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0, FUSE_RENAME, sizeof(struct fuse_rename_in)); } return err; } static int fuse_link(struct dentry *entry, struct inode *newdir, struct dentry *newent) { int err; struct fuse_link_in inarg; struct inode *inode = d_inode(entry); struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); args.opcode = FUSE_LINK; args.in_numargs = 2; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.in_args[1].size = newent->d_name.len + 1; args.in_args[1].value = newent->d_name.name; err = create_new_entry(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode); if (!err) fuse_update_ctime_in_cache(inode); else if (err == -EINTR) fuse_invalidate_attr(inode); return err; } static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, struct fuse_attr *attr, struct kstat *stat) { unsigned int blkbits; struct fuse_conn *fc = get_fuse_conn(inode); vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns, make_kuid(fc->user_ns, attr->uid)); vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, make_kgid(fc->user_ns, attr->gid)); stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); stat->nlink = attr->nlink; stat->uid = vfsuid_into_kuid(vfsuid); stat->gid = vfsgid_into_kgid(vfsgid); stat->rdev = inode->i_rdev; stat->atime.tv_sec = attr->atime; stat->atime.tv_nsec = attr->atimensec; stat->mtime.tv_sec = attr->mtime; stat->mtime.tv_nsec = attr->mtimensec; stat->ctime.tv_sec = attr->ctime; stat->ctime.tv_nsec = attr->ctimensec; stat->size = attr->size; stat->blocks = attr->blocks; if (attr->blksize != 0) blkbits = ilog2(attr->blksize); else blkbits = inode->i_sb->s_blocksize_bits; stat->blksize = 1 << blkbits; } static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr) { memset(attr, 0, sizeof(*attr)); attr->ino = sx->ino; attr->size = sx->size; attr->blocks = sx->blocks; attr->atime = sx->atime.tv_sec; attr->mtime = sx->mtime.tv_sec; attr->ctime = sx->ctime.tv_sec; attr->atimensec = sx->atime.tv_nsec; attr->mtimensec = sx->mtime.tv_nsec; attr->ctimensec = sx->ctime.tv_nsec; attr->mode = sx->mode; attr->nlink = sx->nlink; attr->uid = sx->uid; attr->gid = sx->gid; attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor)); attr->blksize = sx->blksize; } static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode, struct file *file, struct kstat *stat) { int err; struct fuse_attr attr; struct fuse_statx *sx; struct fuse_statx_in inarg; struct fuse_statx_out outarg; struct fuse_mount *fm = get_fuse_mount(inode); u64 attr_version = fuse_get_attr_version(fm->fc); FUSE_ARGS(args); memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); /* Directories have separate file-handle space */ if (file && S_ISREG(inode->i_mode)) { struct fuse_file *ff = file->private_data; inarg.getattr_flags |= FUSE_GETATTR_FH; inarg.fh = ff->fh; } /* For now leave sync hints as the default, request all stats. */ inarg.sx_flags = 0; inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME; args.opcode = FUSE_STATX; args.nodeid = get_node_id(inode); args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.out_numargs = 1; args.out_args[0].size = sizeof(outarg); args.out_args[0].value = &outarg; err = fuse_simple_request(fm, &args); if (err) return err; sx = &outarg.stat; if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) || ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) || inode_wrong_type(inode, sx->mode)))) { fuse_make_bad(inode); return -EIO; } fuse_statx_to_attr(&outarg.stat, &attr); if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) { fuse_change_attributes(inode, &attr, &outarg.stat, ATTR_TIMEOUT(&outarg), attr_version); } if (stat) { stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); stat->btime.tv_sec = sx->btime.tv_sec; stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); fuse_fillattr(idmap, inode, &attr, stat); stat->result_mask |= STATX_TYPE; } return 0; } static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode, struct kstat *stat, struct file *file) { int err; struct fuse_getattr_in inarg; struct fuse_attr_out outarg; struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); u64 attr_version; attr_version = fuse_get_attr_version(fm->fc); memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); /* Directories have separate file-handle space */ if (file && S_ISREG(inode->i_mode)) { struct fuse_file *ff = file->private_data; inarg.getattr_flags |= FUSE_GETATTR_FH; inarg.fh = ff->fh; } args.opcode = FUSE_GETATTR; args.nodeid = get_node_id(inode); args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.out_numargs = 1; args.out_args[0].size = sizeof(outarg); args.out_args[0].value = &outarg; err = fuse_simple_request(fm, &args); if (!err) { if (fuse_invalid_attr(&outarg.attr) || inode_wrong_type(inode, outarg.attr.mode)) { fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), attr_version); if (stat) fuse_fillattr(idmap, inode, &outarg.attr, stat); } } return err; } static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode, struct file *file, struct kstat *stat, u32 request_mask, unsigned int flags) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); int err = 0; bool sync; u32 inval_mask = READ_ONCE(fi->inval_mask); u32 cache_mask = fuse_get_cache_mask(inode); /* FUSE only supports basic stats and possibly btime */ request_mask &= STATX_BASIC_STATS | STATX_BTIME; retry: if (fc->no_statx) request_mask &= STATX_BASIC_STATS; if (!request_mask) sync = false; else if (flags & AT_STATX_FORCE_SYNC) sync = true; else if (flags & AT_STATX_DONT_SYNC) sync = false; else if (request_mask & inval_mask & ~cache_mask) sync = true; else sync = time_before64(fi->i_time, get_jiffies_64()); if (sync) { forget_all_cached_acls(inode); /* Try statx if BTIME is requested */ if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) { err = fuse_do_statx(idmap, inode, file, stat); if (err == -ENOSYS) { fc->no_statx = 1; err = 0; goto retry; } } else { err = fuse_do_getattr(idmap, inode, stat, file); } } else if (stat) { generic_fillattr(idmap, request_mask, inode, stat); stat->mode = fi->orig_i_mode; stat->ino = fi->orig_ino; if (test_bit(FUSE_I_BTIME, &fi->state)) { stat->btime = fi->i_btime; stat->result_mask |= STATX_BTIME; } } return err; } int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) { return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0); } int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, u64 child_nodeid, struct qstr *name, u32 flags) { int err = -ENOTDIR; struct inode *parent; struct dentry *dir; struct dentry *entry; parent = fuse_ilookup(fc, parent_nodeid, NULL); if (!parent) return -ENOENT; inode_lock_nested(parent, I_MUTEX_PARENT); if (!S_ISDIR(parent->i_mode)) goto unlock; err = -ENOENT; dir = d_find_alias(parent); if (!dir) goto unlock; name->hash = full_name_hash(dir, name->name, name->len); entry = d_lookup(dir, name); dput(dir); if (!entry) goto unlock; fuse_dir_changed(parent); if (!(flags & FUSE_EXPIRE_ONLY)) d_invalidate(entry); fuse_invalidate_entry_cache(entry); if (child_nodeid != 0 && d_really_is_positive(entry)) { inode_lock(d_inode(entry)); if (get_node_id(d_inode(entry)) != child_nodeid) { err = -ENOENT; goto badentry; } if (d_mountpoint(entry)) { err = -EBUSY; goto badentry; } if (d_is_dir(entry)) { shrink_dcache_parent(entry); if (!simple_empty(entry)) { err = -ENOTEMPTY; goto badentry; } d_inode(entry)->i_flags |= S_DEAD; } dont_mount(entry); clear_nlink(d_inode(entry)); err = 0; badentry: inode_unlock(d_inode(entry)); if (!err) d_delete(entry); } else { err = 0; } dput(entry); unlock: inode_unlock(parent); iput(parent); return err; } static inline bool fuse_permissible_uidgid(struct fuse_conn *fc) { const struct cred *cred = current_cred(); return (uid_eq(cred->euid, fc->user_id) && uid_eq(cred->suid, fc->user_id) && uid_eq(cred->uid, fc->user_id) && gid_eq(cred->egid, fc->group_id) && gid_eq(cred->sgid, fc->group_id) && gid_eq(cred->gid, fc->group_id)); } /* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the current process. This * means, that the filesystem daemon is able to record the exact * filesystem operations performed, and can also control the behavior * of the requester process in otherwise impossible ways. For example * it can delay the operation for arbitrary length of time allowing * DoS against the requester. * * For this reason only those processes can call into the filesystem, * for which the owner of the mount has ptrace privilege. This * excludes processes started by other users, suid or sgid processes. */ bool fuse_allow_current_process(struct fuse_conn *fc) { bool allow; if (fc->allow_other) allow = current_in_userns(fc->user_ns); else allow = fuse_permissible_uidgid(fc); if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN)) allow = true; return allow; } static int fuse_access(struct inode *inode, int mask) { struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); struct fuse_access_in inarg; int err; BUG_ON(mask & MAY_NOT_BLOCK); /* * We should not send FUSE_ACCESS to the userspace * when idmapped mounts are enabled as for this case * we have fc->default_permissions = 1 and access * permission checks are done on the kernel side. */ WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP)); if (fm->fc->no_access) return 0; memset(&inarg, 0, sizeof(inarg)); inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); args.opcode = FUSE_ACCESS; args.nodeid = get_node_id(inode); args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; err = fuse_simple_request(fm, &args); if (err == -ENOSYS) { fm->fc->no_access = 1; err = 0; } return err; } static int fuse_perm_getattr(struct inode *inode, int mask) { if (mask & MAY_NOT_BLOCK) return -ECHILD; forget_all_cached_acls(inode); return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL); } /* * Check permission. The two basic access models of FUSE are: * * 1) Local access checking ('default_permissions' mount option) based * on file mode. This is the plain old disk filesystem permission * model. * * 2) "Remote" access checking, where server is responsible for * checking permission in each inode operation. An exception to this * is if ->permission() was invoked from sys_access() in which case an * access request is sent. Execute permission is still checked * locally based on file mode. */ static int fuse_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct fuse_conn *fc = get_fuse_conn(inode); bool refreshed = false; int err = 0; if (fuse_is_bad(inode)) return -EIO; if (!fuse_allow_current_process(fc)) return -EACCES; /* * If attributes are needed, refresh them before proceeding */ if (fc->default_permissions || ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { struct fuse_inode *fi = get_fuse_inode(inode); u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID; if (perm_mask & READ_ONCE(fi->inval_mask) || time_before64(fi->i_time, get_jiffies_64())) { refreshed = true; err = fuse_perm_getattr(inode, mask); if (err) return err; } } if (fc->default_permissions) { err = generic_permission(idmap, inode, mask); /* If permission is denied, try to refresh file attributes. This is also needed, because the root node will at first have no permissions */ if (err == -EACCES && !refreshed) { err = fuse_perm_getattr(inode, mask); if (!err) err = generic_permission(idmap, inode, mask); } /* Note: the opposite of the above test does not exist. So if permissions are revoked this won't be noticed immediately, only after the attribute timeout has expired */ } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { if (refreshed) return -EACCES; err = fuse_perm_getattr(inode, mask); if (!err && !(inode->i_mode & S_IXUGO)) return -EACCES; } } return err; } static int fuse_readlink_page(struct inode *inode, struct folio *folio) { struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 }; struct fuse_args_pages ap = { .num_folios = 1, .folios = &folio, .descs = &desc, }; char *link; ssize_t res; ap.args.opcode = FUSE_READLINK; ap.args.nodeid = get_node_id(inode); ap.args.out_pages = true; ap.args.out_argvar = true; ap.args.page_zeroing = true; ap.args.out_numargs = 1; ap.args.out_args[0].size = desc.length; res = fuse_simple_request(fm, &ap.args); fuse_invalidate_atime(inode); if (res < 0) return res; if (WARN_ON(res >= PAGE_SIZE)) return -EIO; link = folio_address(folio); link[res] = '\0'; return 0; } static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { struct fuse_conn *fc = get_fuse_conn(inode); struct folio *folio; int err; err = -EIO; if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) return page_get_link(dentry, inode, callback); err = -ECHILD; if (!dentry) goto out_err; folio = folio_alloc(GFP_KERNEL, 0); err = -ENOMEM; if (!folio) goto out_err; err = fuse_readlink_page(inode, folio); if (err) { folio_put(folio); goto out_err; } set_delayed_call(callback, page_put_link, &folio->page); return folio_address(folio); out_err: return ERR_PTR(err); } static int fuse_dir_open(struct inode *inode, struct file *file) { struct fuse_mount *fm = get_fuse_mount(inode); int err; if (fuse_is_bad(inode)) return -EIO; err = generic_file_open(inode, file); if (err) return err; err = fuse_do_open(fm, get_node_id(inode), file, true); if (!err) { struct fuse_file *ff = file->private_data; /* * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for * directories for backward compatibility, though it's unlikely * to be useful. */ if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE)) nonseekable_open(inode, file); if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); } return err; } static int fuse_dir_release(struct inode *inode, struct file *file) { fuse_release_common(file, true); return 0; } static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); int err; if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) return 0; inode_lock(inode); err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR); if (err == -ENOSYS) { fc->no_fsyncdir = 1; err = 0; } inode_unlock(inode); return err; } static long fuse_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ if (fc->minor < 18) return -ENOTTY; return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); } static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); if (fc->minor < 18) return -ENOTTY; return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); } static bool update_mtime(unsigned ivalid, bool trust_local_mtime) { /* Always update if mtime is explicitly set */ if (ivalid & ATTR_MTIME_SET) return true; /* Or if kernel i_mtime is the official one */ if (trust_local_mtime) return true; /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) return false; /* In all other cases update */ return true; } static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc, struct iattr *iattr, struct fuse_setattr_in *arg, bool trust_local_cmtime) { unsigned ivalid = iattr->ia_valid; if (ivalid & ATTR_MODE) arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; if (ivalid & ATTR_UID) { kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid); arg->valid |= FATTR_UID; arg->uid = from_kuid(fc->user_ns, fsuid); } if (ivalid & ATTR_GID) { kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid); arg->valid |= FATTR_GID; arg->gid = from_kgid(fc->user_ns, fsgid); } if (ivalid & ATTR_SIZE) arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; if (ivalid & ATTR_ATIME) { arg->valid |= FATTR_ATIME; arg->atime = iattr->ia_atime.tv_sec; arg->atimensec = iattr->ia_atime.tv_nsec; if (!(ivalid & ATTR_ATIME_SET)) arg->valid |= FATTR_ATIME_NOW; } if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { arg->valid |= FATTR_MTIME; arg->mtime = iattr->ia_mtime.tv_sec; arg->mtimensec = iattr->ia_mtime.tv_nsec; if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) arg->valid |= FATTR_MTIME_NOW; } if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { arg->valid |= FATTR_CTIME; arg->ctime = iattr->ia_ctime.tv_sec; arg->ctimensec = iattr->ia_ctime.tv_nsec; } } /* * Prevent concurrent writepages on inode * * This is done by adding a negative bias to the inode write counter * and waiting for all pending writes to finish. */ void fuse_set_nowrite(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!inode_is_locked(inode)); spin_lock(&fi->lock); BUG_ON(fi->writectr < 0); fi->writectr += FUSE_NOWRITE; spin_unlock(&fi->lock); wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); } /* * Allow writepages on inode * * Remove the bias from the writecounter and send any queued * writepages. */ static void __fuse_release_nowrite(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(fi->writectr != FUSE_NOWRITE); fi->writectr = 0; fuse_flush_writepages(inode); } void fuse_release_nowrite(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fi->lock); __fuse_release_nowrite(inode); spin_unlock(&fi->lock); } static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, struct inode *inode, struct fuse_setattr_in *inarg_p, struct fuse_attr_out *outarg_p) { args->opcode = FUSE_SETATTR; args->nodeid = get_node_id(inode); args->in_numargs = 1; args->in_args[0].size = sizeof(*inarg_p); args->in_args[0].value = inarg_p; args->out_numargs = 1; args->out_args[0].size = sizeof(*outarg_p); args->out_args[0].value = outarg_p; } /* * Flush inode->i_mtime to the server */ int fuse_flush_times(struct inode *inode, struct fuse_file *ff) { struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); struct fuse_setattr_in inarg; struct fuse_attr_out outarg; memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); inarg.valid = FATTR_MTIME; inarg.mtime = inode_get_mtime_sec(inode); inarg.mtimensec = inode_get_mtime_nsec(inode); if (fm->fc->minor >= 23) { inarg.valid |= FATTR_CTIME; inarg.ctime = inode_get_ctime_sec(inode); inarg.ctimensec = inode_get_ctime_nsec(inode); } if (ff) { inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg); return fuse_simple_request(fm, &args); } /* * Set attributes, and at the same time refresh them. * * Truncation is slightly complicated, because the 'truncate' request * may fail, in which case we don't want to touch the mapping. * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr, struct file *file) { struct inode *inode = d_inode(dentry); struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_conn *fc = fm->fc; struct fuse_inode *fi = get_fuse_inode(inode); struct address_space *mapping = inode->i_mapping; FUSE_ARGS(args); struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); loff_t oldsize; int err; bool trust_local_cmtime = is_wb; bool fault_blocked = false; if (!fc->default_permissions) attr->ia_valid |= ATTR_FORCE; err = setattr_prepare(idmap, dentry, attr); if (err) return err; if (attr->ia_valid & ATTR_SIZE) { if (WARN_ON(!S_ISREG(inode->i_mode))) return -EIO; is_truncate = true; } if (FUSE_IS_DAX(inode) && is_truncate) { filemap_invalidate_lock(mapping); fault_blocked = true; err = fuse_dax_break_layouts(inode, 0, 0); if (err) { filemap_invalidate_unlock(mapping); return err; } } if (attr->ia_valid & ATTR_OPEN) { /* This is coming from open(..., ... | O_TRUNC); */ WARN_ON(!(attr->ia_valid & ATTR_SIZE)); WARN_ON(attr->ia_size != 0); if (fc->atomic_o_trunc) { /* * No need to send request to userspace, since actual * truncation has already been done by OPEN. But still * need to truncate page cache. */ i_size_write(inode, 0); truncate_pagecache(inode, 0); goto out; } file = NULL; } /* Flush dirty data/metadata before non-truncate SETATTR */ if (is_wb && attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | ATTR_TIMES_SET)) { err = write_inode_now(inode, true); if (err) return err; fuse_set_nowrite(inode); fuse_release_nowrite(inode); } if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); if (trust_local_cmtime && attr->ia_size != inode->i_size) attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; } memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } /* Kill suid/sgid for non-directory chown unconditionally */ if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) && attr->ia_valid & (ATTR_UID | ATTR_GID)) inarg.valid |= FATTR_KILL_SUIDGID; if (attr->ia_valid & ATTR_SIZE) { /* For mandatory locking in truncate */ inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); /* Kill suid/sgid for truncate only if no CAP_FSETID */ if (fc->handle_killpriv_v2 && !capable(CAP_FSETID)) inarg.valid |= FATTR_KILL_SUIDGID; } fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); err = fuse_simple_request(fm, &args); if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); goto error; } if (fuse_invalid_attr(&outarg.attr) || inode_wrong_type(inode, outarg.attr.mode)) { fuse_make_bad(inode); err = -EIO; goto error; } spin_lock(&fi->lock); /* the kernel maintains i_mtime locally */ if (trust_local_cmtime) { if (attr->ia_valid & ATTR_MTIME) inode_set_mtime_to_ts(inode, attr->ia_mtime); if (attr->ia_valid & ATTR_CTIME) inode_set_ctime_to_ts(inode, attr->ia_ctime); /* FIXME: clear I_DIRTY_SYNC? */ } fuse_change_attributes_common(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), fuse_get_cache_mask(inode), 0); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ if (!is_wb || is_truncate) i_size_write(inode, outarg.attr.size); if (is_truncate) { /* NOTE: this may release/reacquire fi->lock */ __fuse_release_nowrite(inode); } spin_unlock(&fi->lock); /* * Only call invalidate_inode_pages2() after removing * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock. */ if ((is_truncate || !is_wb) && S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { truncate_pagecache(inode, outarg.attr.size); invalidate_inode_pages2(mapping); } clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); out: if (fault_blocked) filemap_invalidate_unlock(mapping); return 0; error: if (is_truncate) fuse_release_nowrite(inode); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); if (fault_blocked) filemap_invalidate_unlock(mapping); return err; } static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry, struct iattr *attr) { struct inode *inode = d_inode(entry); struct fuse_conn *fc = get_fuse_conn(inode); struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; if (fuse_is_bad(inode)) return -EIO; if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) { attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE); /* * The only sane way to reliably kill suid/sgid is to do it in * the userspace filesystem * * This should be done on write(), truncate() and chown(). */ if (!fc->handle_killpriv && !fc->handle_killpriv_v2) { /* * ia_mode calculation may have used stale i_mode. * Refresh and recalculate. */ ret = fuse_do_getattr(idmap, inode, NULL, file); if (ret) return ret; attr->ia_mode = inode->i_mode; if (inode->i_mode & S_ISUID) { attr->ia_valid |= ATTR_MODE; attr->ia_mode &= ~S_ISUID; } if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { attr->ia_valid |= ATTR_MODE; attr->ia_mode &= ~S_ISGID; } } } if (!attr->ia_valid) return 0; ret = fuse_do_setattr(idmap, entry, attr, file); if (!ret) { /* * If filesystem supports acls it may have updated acl xattrs in * the filesystem, so forget cached acls for the inode. */ if (fc->posix_acl) forget_all_cached_acls(inode); /* Directory mode changed, may need to revalidate access */ if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE)) fuse_invalidate_entry_cache(entry); } return ret; } static int fuse_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); if (fuse_is_bad(inode)) return -EIO; if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* * If user explicitly requested *nothing* then don't * error out, but return st_dev only. */ stat->result_mask = 0; stat->dev = inode->i_sb->s_dev; return 0; } return -EACCES; } return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags); } static const struct inode_operations fuse_dir_inode_operations = { .lookup = fuse_lookup, .mkdir = fuse_mkdir, .symlink = fuse_symlink, .unlink = fuse_unlink, .rmdir = fuse_rmdir, .rename = fuse_rename2, .link = fuse_link, .setattr = fuse_setattr, .create = fuse_create, .atomic_open = fuse_atomic_open, .tmpfile = fuse_tmpfile, .mknod = fuse_mknod, .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, .get_inode_acl = fuse_get_inode_acl, .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, }; static const struct file_operations fuse_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = fuse_readdir, .open = fuse_dir_open, .release = fuse_dir_release, .fsync = fuse_dir_fsync, .unlocked_ioctl = fuse_dir_ioctl, .compat_ioctl = fuse_dir_compat_ioctl, }; static const struct inode_operations fuse_common_inode_operations = { .setattr = fuse_setattr, .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, .get_inode_acl = fuse_get_inode_acl, .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, }; static const struct inode_operations fuse_symlink_inode_operations = { .setattr = fuse_setattr, .get_link = fuse_get_link, .getattr = fuse_getattr, .listxattr = fuse_listxattr, }; void fuse_init_common(struct inode *inode) { inode->i_op = &fuse_common_inode_operations; } void fuse_init_dir(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); inode->i_op = &fuse_dir_inode_operations; inode->i_fop = &fuse_dir_operations; spin_lock_init(&fi->rdc.lock); fi->rdc.cached = false; fi->rdc.size = 0; fi->rdc.pos = 0; fi->rdc.version = 0; } static int fuse_symlink_read_folio(struct file *null, struct folio *folio) { int err = fuse_readlink_page(folio->mapping->host, folio); if (!err) folio_mark_uptodate(folio); folio_unlock(folio); return err; } static const struct address_space_operations fuse_symlink_aops = { .read_folio = fuse_symlink_read_folio, }; void fuse_init_symlink(struct inode *inode) { inode->i_op = &fuse_symlink_inode_operations; inode->i_data.a_ops = &fuse_symlink_aops; inode_nohighmem(inode); }
7 7 5 2 1 1 13 1 8 1 3 7 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 // SPDX-License-Identifier: GPL-2.0-only #include <net/ip.h> #include <net/tcp.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter/nfnetlink_osf.h> struct nft_osf { u8 dreg; u8 ttl; u32 flags; }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { [NFTA_OSF_DREG] = { .type = NLA_U32 }, [NFTA_OSF_TTL] = { .type = NLA_U8 }, [NFTA_OSF_FLAGS] = { .type = NLA_U32 }, }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_osf *priv = nft_expr_priv(expr); u32 *dest = &regs->data[priv->dreg]; struct sk_buff *skb = pkt->skb; char os_match[NFT_OSF_MAXGENRELEN]; const struct tcphdr *tcp; struct nf_osf_data data; struct tcphdr _tcph; if (pkt->tprot != IPPROTO_TCP) { regs->verdict.code = NFT_BREAK; return; } tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); if (!tcp) { regs->verdict.code = NFT_BREAK; return; } if (!tcp->syn) { regs->verdict.code = NFT_BREAK; return; } if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) { strscpy_pad((char *)dest, "unknown", NFT_OSF_MAXGENRELEN); } else { if (priv->flags & NFT_OSF_F_VERSION) snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s", data.genre, data.version); else strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN); strscpy_pad((char *)dest, os_match, NFT_OSF_MAXGENRELEN); } } static int nft_osf_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_osf *priv = nft_expr_priv(expr); u32 flags; u8 ttl; if (!tb[NFTA_OSF_DREG]) return -EINVAL; if (tb[NFTA_OSF_TTL]) { ttl = nla_get_u8(tb[NFTA_OSF_TTL]); if (ttl > 2) return -EINVAL; priv->ttl = ttl; } if (tb[NFTA_OSF_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS])); if (flags != NFT_OSF_F_VERSION) return -EINVAL; priv->flags = flags; } return nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); } static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_osf *priv = nft_expr_priv(expr); if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl)) goto nla_put_failure; if (nla_put_u32(skb, NFTA_OSF_FLAGS, ntohl((__force __be32)priv->flags))) goto nla_put_failure; if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_osf_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { unsigned int hooks; switch (ctx->family) { case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_FORWARD); break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); } static bool nft_osf_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { struct nft_osf *priv = nft_expr_priv(expr); struct nft_osf *osf; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); return false; } osf = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->flags != osf->flags || priv->ttl != osf->ttl) { nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); return false; } if (!track->regs[priv->dreg].bitwise) return true; return false; } static struct nft_expr_type nft_osf_type; static const struct nft_expr_ops nft_osf_op = { .eval = nft_osf_eval, .size = NFT_EXPR_SIZE(sizeof(struct nft_osf)), .init = nft_osf_init, .dump = nft_osf_dump, .type = &nft_osf_type, .validate = nft_osf_validate, .reduce = nft_osf_reduce, }; static struct nft_expr_type nft_osf_type __read_mostly = { .ops = &nft_osf_op, .name = "osf", .owner = THIS_MODULE, .policy = nft_osf_policy, .maxattr = NFTA_OSF_MAX, }; static int __init nft_osf_module_init(void) { return nft_register_expr(&nft_osf_type); } static void __exit nft_osf_module_exit(void) { return nft_unregister_expr(&nft_osf_type); } module_init(nft_osf_module_init); module_exit(nft_osf_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); MODULE_ALIAS_NFT_EXPR("osf"); MODULE_DESCRIPTION("nftables passive OS fingerprint support");
9 3 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* * IPv6 specific functions of netfilter core * * Rusty Russell (C) 2000 -- This code is GPL. * Patrick McHardy (C) 2006-2012 */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/export.h> #include <net/addrconf.h> #include <net/dst.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/xfrm.h> #include <net/netfilter/nf_queue.h> #include <net/netfilter/nf_conntrack_bridge.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include "../bridge/br_private.h" int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_l3mdev = l3mdev_master_ifindex(dev), .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), }; int err; if (sk && sk->sk_bound_dev_if) fl6.flowi6_oif = sk->sk_bound_dev_if; else if (strict) fl6.flowi6_oif = dev->ifindex; fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys); dst = ip6_route_output(net, sk, &fl6); err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); net_dbg_ratelimited("ip6_route_me_harder: No more route\n"); dst_release(dst); return err; } /* Drop old route. */ skb_dst_drop(skb); skb_dst_set(skb, dst); #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); skb_dst_set(skb, dst); } #endif /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) return -ENOMEM; return 0; } EXPORT_SYMBOL(ip6_route_me_harder); static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); } return 0; } int __nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */ .sk.sk_bound_dev_if = 1, .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, }; const void *sk = strict ? &fake_sk : NULL; struct dst_entry *result; int err; result = ip6_route_output(net, sk, &fl->u.ip6); err = result->error; if (err) dst_release(result); else *dst = result; return err; } EXPORT_SYMBOL_GPL(__nf_ip6_route); int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; u8 tstamp_type = skb->tstamp_type; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; unsigned int mtu, hlen; int hroom, err = 0; __be32 frag_id; err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) goto blackhole; hlen = err; nexthdr = *prevhdr; mtu = skb->dev->mtu; if (frag_max_size > mtu || frag_max_size < IPV6_MIN_MTU) goto blackhole; mtu = frag_max_size; if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto blackhole; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) goto blackhole; hroom = LL_RESERVED_SPACE(skb->dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); struct ip6_fraglist_iter iter; struct sk_buff *frag2; if (first_len - hlen > mtu || skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto blackhole; if (skb_cloned(skb)) goto slow_path; skb_walk_frags(skb, frag2) { if (frag2->len > mtu || skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) goto blackhole; /* Partially cloned skb? */ if (skb_shared(frag2)) goto slow_path; } err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, &iter); if (err < 0) goto blackhole; for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) ip6_fraglist_prepare(skb, &iter); skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; skb = ip6_fraglist_next(&iter); } kfree(iter.tmp_hdr); if (!err) return 0; kfree_skb_list(iter.frag); return err; } slow_path: /* This is a linearized skbuff, the original geometry is lost for us. * This may also be a clone skbuff, we could preserve the geometry for * the copies but probably not worth the effort. */ ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom, LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id, &state); while (state.left > 0) { struct sk_buff *skb2; skb2 = ip6_frag_next(skb, &state); if (IS_ERR(skb2)) { err = PTR_ERR(skb2); goto blackhole; } skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; } consume_skb(skb); return err; blackhole: kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_ip6_fragment); static const struct nf_ipv6_ops ipv6ops = { #if IS_MODULE(CONFIG_IPV6) .chk_addr = ipv6_chk_addr, .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, .route = __nf_ip6_route, #if IS_ENABLED(CONFIG_SYN_COOKIES) .cookie_init_sequence = __cookie_v6_init_sequence, .cookie_v6_check = __cookie_v6_check, #endif #endif .route_input = ip6_route_input, .fragment = ip6_fragment, .reroute = nf_ip6_reroute, #if IS_MODULE(CONFIG_IPV6) .br_fragment = br_ip6_fragment, #endif }; int __init ipv6_netfilter_init(void) { RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); return 0; } /* This can be called from inet6_init() on errors, so it cannot * be marked __exit. -DaveM */ void ipv6_netfilter_fini(void) { RCU_INIT_POINTER(nf_ipv6_ops, NULL); }
17 17 17 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 // SPDX-License-Identifier: GPL-2.0-or-later /* * cgroups support for the BFQ I/O scheduler. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/cgroup.h> #include <linux/ktime.h> #include <linux/rbtree.h> #include <linux/ioprio.h> #include <linux/sbitmap.h> #include <linux/delay.h> #include "elevator.h" #include "bfq-iosched.h" #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp) { int ret; ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); if (ret) return ret; atomic64_set(&stat->aux_cnt, 0); return 0; } static void bfq_stat_exit(struct bfq_stat *stat) { percpu_counter_destroy(&stat->cpu_cnt); } /** * bfq_stat_add - add a value to a bfq_stat * @stat: target bfq_stat * @val: value to add * * Add @val to @stat. The caller must ensure that IRQ on the same CPU * don't re-enter this function for the same counter. */ static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val) { percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); } /** * bfq_stat_read - read the current value of a bfq_stat * @stat: bfq_stat to read */ static inline uint64_t bfq_stat_read(struct bfq_stat *stat) { return percpu_counter_sum_positive(&stat->cpu_cnt); } /** * bfq_stat_reset - reset a bfq_stat * @stat: bfq_stat to reset */ static inline void bfq_stat_reset(struct bfq_stat *stat) { percpu_counter_set(&stat->cpu_cnt, 0); atomic64_set(&stat->aux_cnt, 0); } /** * bfq_stat_add_aux - add a bfq_stat into another's aux count * @to: the destination bfq_stat * @from: the source * * Add @from's count including the aux one to @to's aux count. */ static inline void bfq_stat_add_aux(struct bfq_stat *to, struct bfq_stat *from) { atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt), &to->aux_cnt); } /** * blkg_prfill_stat - prfill callback for bfq_stat * @sf: seq_file to print to * @pd: policy private data of interest * @off: offset to the bfq_stat in @pd * * prfill callback for printing a bfq_stat. */ static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off)); } /* bfqg stats flags */ enum bfqg_stats_flags { BFQG_stats_waiting = 0, BFQG_stats_idling, BFQG_stats_empty, }; #define BFQG_FLAG_FNS(name) \ static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \ { \ stats->flags |= (1 << BFQG_stats_##name); \ } \ static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \ { \ stats->flags &= ~(1 << BFQG_stats_##name); \ } \ static int bfqg_stats_##name(struct bfqg_stats *stats) \ { \ return (stats->flags & (1 << BFQG_stats_##name)) != 0; \ } \ BFQG_FLAG_FNS(waiting) BFQG_FLAG_FNS(idling) BFQG_FLAG_FNS(empty) #undef BFQG_FLAG_FNS /* This should be called with the scheduler lock held. */ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_waiting(stats)) return; now = blk_time_get_ns(); if (now > stats->start_group_wait_time) bfq_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); bfqg_stats_clear_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, struct bfq_group *curr_bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_waiting(stats)) return; if (bfqg == curr_bfqg) return; stats->start_group_wait_time = blk_time_get_ns(); bfqg_stats_mark_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_empty(stats)) return; now = blk_time_get_ns(); if (now > stats->start_empty_time) bfq_stat_add(&stats->empty_time, now - stats->start_empty_time); bfqg_stats_clear_empty(stats); } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { bfq_stat_add(&bfqg->stats.dequeue, 1); } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (blkg_rwstat_total(&stats->queued)) return; /* * group is already marked empty. This can happen if bfqq got new * request in parent group and moved to this group while being added * to service tree. Just ignore the event and move on. */ if (bfqg_stats_empty(stats)) return; stats->start_empty_time = blk_time_get_ns(); bfqg_stats_mark_empty(stats); } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_idling(stats)) { u64 now = blk_time_get_ns(); if (now > stats->start_idle_time) bfq_stat_add(&stats->idle_time, now - stats->start_idle_time); bfqg_stats_clear_idling(stats); } } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; stats->start_idle_time = blk_time_get_ns(); bfqg_stats_mark_idling(stats); } void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; bfq_stat_add(&stats->avg_queue_size_sum, blkg_rwstat_total(&stats->queued)); bfq_stat_add(&stats->avg_queue_size_samples, 1); bfqg_stats_update_group_wait_time(stats); } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); if (!(bfqq == bfqg->bfqd->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, -1); } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.merged, opf, 1); } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; u64 now = blk_time_get_ns(); if (now > io_start_time_ns) blkg_rwstat_add(&stats->service_time, opf, now - io_start_time_ns); if (io_start_time_ns > start_time_ns) blkg_rwstat_add(&stats->wait_time, opf, io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_CGROUP_DEBUG */ void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ #ifdef CONFIG_BFQ_GROUP_IOSCHED /* * blk-cgroup policy-related handlers * The following functions help in converting between blk-cgroup * internal structures and BFQ-specific structures. */ static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct bfq_group, pd) : NULL; } struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg) { return pd_to_blkg(&bfqg->pd); } static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) { return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq)); } /* * bfq_group handlers * The following functions help in navigating the bfq_group hierarchy * by allowing to find the parent of a bfq_group or the bfq_group * associated to a bfq_queue. */ static struct bfq_group *bfqg_parent(struct bfq_group *bfqg) { struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent; return pblkg ? blkg_to_bfqg(pblkg) : NULL; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { struct bfq_entity *group_entity = bfqq->entity.parent; return group_entity ? container_of(group_entity, struct bfq_group, entity) : bfqq->bfqd->root_group; } /* * The following two functions handle get and put of a bfq_group by * wrapping the related blk-cgroup hooks. */ static void bfqg_get(struct bfq_group *bfqg) { refcount_inc(&bfqg->ref); } static void bfqg_put(struct bfq_group *bfqg) { if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); } static void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); blkg_get(bfqg_to_blkg(bfqg)); } void bfqg_and_blkg_put(struct bfq_group *bfqg) { blkg_put(bfqg_to_blkg(bfqg)); bfqg_put(bfqg); } void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq) { struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg); if (!bfqg) return; blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq)); blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1); } /* @stats = 0 */ static void bfqg_stats_reset(struct bfqg_stats *stats) { #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_reset(&stats->merged); blkg_rwstat_reset(&stats->service_time); blkg_rwstat_reset(&stats->wait_time); bfq_stat_reset(&stats->time); bfq_stat_reset(&stats->avg_queue_size_sum); bfq_stat_reset(&stats->avg_queue_size_samples); bfq_stat_reset(&stats->dequeue); bfq_stat_reset(&stats->group_wait_time); bfq_stat_reset(&stats->idle_time); bfq_stat_reset(&stats->empty_time); #endif } /* @to += @from */ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) { if (!to || !from) return; #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_add_aux(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->service_time, &from->service_time); blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); bfq_stat_add_aux(&from->time, &from->time); bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); bfq_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); bfq_stat_add_aux(&to->dequeue, &from->dequeue); bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time); bfq_stat_add_aux(&to->idle_time, &from->idle_time); bfq_stat_add_aux(&to->empty_time, &from->empty_time); #endif } /* * Transfer @bfqg's stats to its parent's aux counts so that the ancestors' * recursive stats can still account for the amount used by this bfqg after * it's gone. */ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) { struct bfq_group *parent; if (!bfqg) /* root_group */ return; parent = bfqg_parent(bfqg); lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock); if (unlikely(!parent)) return; bfqg_stats_add_aux(&parent->stats, &bfqg->stats); bfqg_stats_reset(&bfqg->stats); } void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; /* * Make sure that bfqg and its associated blkg do not * disappear before entity. */ bfqg_and_blkg_get(bfqg); } entity->parent = bfqg->my_entity; /* NULL for root group */ entity->sched_data = &bfqg->sched_data; } static void bfqg_stats_exit(struct bfqg_stats *stats) { blkg_rwstat_exit(&stats->bytes); blkg_rwstat_exit(&stats->ios); #ifdef CONFIG_BFQ_CGROUP_DEBUG blkg_rwstat_exit(&stats->merged); blkg_rwstat_exit(&stats->service_time); blkg_rwstat_exit(&stats->wait_time); blkg_rwstat_exit(&stats->queued); bfq_stat_exit(&stats->time); bfq_stat_exit(&stats->avg_queue_size_sum); bfq_stat_exit(&stats->avg_queue_size_samples); bfq_stat_exit(&stats->dequeue); bfq_stat_exit(&stats->group_wait_time); bfq_stat_exit(&stats->idle_time); bfq_stat_exit(&stats->empty_time); #endif } static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) { if (blkg_rwstat_init(&stats->bytes, gfp) || blkg_rwstat_init(&stats->ios, gfp)) goto error; #ifdef CONFIG_BFQ_CGROUP_DEBUG if (blkg_rwstat_init(&stats->merged, gfp) || blkg_rwstat_init(&stats->service_time, gfp) || blkg_rwstat_init(&stats->wait_time, gfp) || blkg_rwstat_init(&stats->queued, gfp) || bfq_stat_init(&stats->time, gfp) || bfq_stat_init(&stats->avg_queue_size_sum, gfp) || bfq_stat_init(&stats->avg_queue_size_samples, gfp) || bfq_stat_init(&stats->dequeue, gfp) || bfq_stat_init(&stats->group_wait_time, gfp) || bfq_stat_init(&stats->idle_time, gfp) || bfq_stat_init(&stats->empty_time, gfp)) goto error; #endif return 0; error: bfqg_stats_exit(stats); return -ENOMEM; } static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) { return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; } static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) { return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); } static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) { struct bfq_group_data *bgd; bgd = kzalloc(sizeof(*bgd), gfp); if (!bgd) return NULL; bgd->weight = CGROUP_WEIGHT_DFL; return &bgd->pd; } static void bfq_cpd_free(struct blkcg_policy_data *cpd) { kfree(cpd_to_bfqgd(cpd)); } static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; if (bfqg_stats_init(&bfqg->stats, gfp)) { kfree(bfqg); return NULL; } /* see comments in bfq_bic_update_cgroup for why refcounting */ refcount_set(&bfqg->ref, 1); return &bfqg->pd; } static void bfq_pd_init(struct blkg_policy_data *pd) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct bfq_group *bfqg = blkg_to_bfqg(blkg); struct bfq_data *bfqd = blkg->q->elevator->elevator_data; struct bfq_entity *entity = &bfqg->entity; struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); entity->orig_weight = entity->weight = entity->new_weight = d->weight; entity->my_sched_data = &bfqg->sched_data; entity->last_bfqq_created = NULL; bfqg->my_entity = entity; /* * the root_group's will be set to NULL * in bfq_init_queue() */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->num_queues_with_pending_reqs = 0; bfqg->rq_pos_tree = RB_ROOT; } static void bfq_pd_free(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_exit(&bfqg->stats); bfqg_put(bfqg); } static void bfq_pd_reset_stats(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_reset(&bfqg->stats); } static void bfq_group_set_parent(struct bfq_group *bfqg, struct bfq_group *parent) { struct bfq_entity *entity; entity = &bfqg->entity; entity->parent = parent->my_entity; entity->sched_data = &parent->sched_data; } static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { struct bfq_group *parent; struct bfq_entity *entity; /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet * to the private hierarchy of BFQ. */ entity = &bfqg->entity; for_each_entity(entity) { struct bfq_group *curr_bfqg = container_of(entity, struct bfq_group, entity); if (curr_bfqg != bfqd->root_group) { parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; bfq_group_set_parent(curr_bfqg, parent); } } } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; struct bfq_group *bfqg; while (blkg) { if (!blkg->online) { blkg = blkg->parent; continue; } bfqg = blkg_to_bfqg(blkg); if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } blkg = blkg->parent; } bio_associate_blkg_from_css(bio, &bfqg_to_blkg(bfqd->root_group)->blkcg->css); return bfqd->root_group; } /** * bfq_bfqq_move - migrate @bfqq to @bfqg. * @bfqd: queue descriptor. * @bfqq: the queue to move. * @bfqg: the group to move to. * * Move @bfqq to @bfqg, deactivating it from its old group and reactivating * it on the new one. Avoid putting the entity on the old group idle tree. * * Must be called under the scheduler lock, to make sure that the blkg * owning @bfqg does not disappear (see comments in * bfq_bic_update_cgroup on guaranteeing the consistency of blkg * objects). */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) { struct bfq_entity *entity = &bfqq->entity; struct bfq_group *old_parent = bfqq_group(bfqq); bool has_pending_reqs = false; /* * No point to move bfqq to the same group, which can happen when * root group is offlined */ if (old_parent == bfqg) return; /* * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group * until elevator exit. */ if (bfqq == &bfqd->oom_bfqq) return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. */ bfqq->ref++; if (entity->in_groups_with_pending_reqs) { has_pending_reqs = true; bfq_del_bfqq_in_groups_with_pending_reqs(bfqq); } /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we * need to remove bfqq explicitly with bfq_deactivate_bfqq, as * we do below. */ if (bfqq == bfqd->in_service_queue) bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(old_parent); bfq_reassign_last_bfqq(bfqq, NULL); entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); if (has_pending_reqs) bfq_add_bfqq_in_groups_with_pending_reqs(bfqq); if (bfq_bfqq_busy(bfqq)) { if (unlikely(!bfqd->nonrot_with_queueing)) bfq_pos_tree_add_move(bfqd, bfqq); bfq_activate_bfqq(bfqd, bfqq); } if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } static void bfq_sync_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *sync_bfqq, struct bfq_io_cq *bic, struct bfq_group *bfqg, unsigned int act_idx) { struct bfq_queue *bfqq; if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { /* We are the only user of this bfqq, just move it */ if (sync_bfqq->entity.sched_data != &bfqg->sched_data) bfq_bfqq_move(bfqd, sync_bfqq, bfqg); return; } /* * The queue was merged to a different queue. Check * that the merge chain still belongs to the same * cgroup. */ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) if (bfqq->entity.sched_data != &bfqg->sched_data) break; if (bfqq) { /* * Some queue changed cgroup so the merge is not valid * anymore. We cannot easily just cancel the merge (by * clearing new_bfqq) as there may be other processes * using this queue and holding refs to all queues * below sync_bfqq->new_bfqq. Similarly if the merge * already happened, we need to detach from bfqq now * so that we cannot merge bio to a request from the * old cgroup. */ bfq_put_cooperator(sync_bfqq); bic_set_bfqq(bic, NULL, true, act_idx); bfq_release_process_ref(bfqd, sync_bfqq); } } /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. * @bic: the bic to move. * @bfqg: the group to move to. * * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) */ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_group *bfqg) { unsigned int act_idx; for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); if (async_bfqq && async_bfqq->entity.sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, false, act_idx); bfq_release_process_ref(bfqd, async_bfqq); } if (sync_bfqq) bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); } } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) return; /* * New cgroup for this process. Make sure it is linked to bfq internal * cgroup hierarchy. */ bfq_link_bfqg(bfqd, bfqg); __bfq_bic_change_cgroup(bfqd, bic, bfqg); bic->blkcg_serial_nr = serial_nr; } /** * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. * @st: the service tree being flushed. */ static void bfq_flush_idle_tree(struct bfq_service_tree *st) { struct bfq_entity *entity = st->first_idle; for (; entity ; entity = st->first_idle) __bfq_deactivate_entity(entity, false); } /** * bfq_reparent_leaf_entity - move leaf entity to the root_group. * @bfqd: the device data structure with the root group. * @entity: the entity to move, if entity is a leaf; or the parent entity * of an active leaf entity to move, if entity is not a leaf. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_entity *entity, int ioprio_class) { struct bfq_queue *bfqq; struct bfq_entity *child_entity = entity; while (child_entity->my_sched_data) { /* leaf not reached yet */ struct bfq_sched_data *child_sd = child_entity->my_sched_data; struct bfq_service_tree *child_st = child_sd->service_tree + ioprio_class; struct rb_root *child_active = &child_st->active; child_entity = bfq_entity_of(rb_first(child_active)); if (!child_entity) child_entity = child_sd->in_service_entity; } bfqq = bfq_entity_to_bfqq(child_entity); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** * bfq_reparent_active_queues - move to the root group all active queues. * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree to start the search from. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_active_queues(struct bfq_data *bfqd, struct bfq_group *bfqg, struct bfq_service_tree *st, int ioprio_class) { struct rb_root *active = &st->active; struct bfq_entity *entity; while ((entity = bfq_entity_of(rb_first(active)))) bfq_reparent_leaf_entity(bfqd, entity, ioprio_class); if (bfqg->sched_data.in_service_entity) bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.in_service_entity, ioprio_class); } /** * bfq_pd_offline - deactivate the entity associated with @pd, * and reparent its children entities. * @pd: descriptor of the policy going offline. * * blkio already grabs the queue_lock for us, so no need to use * RCU-based magic */ static void bfq_pd_offline(struct blkg_policy_data *pd) { struct bfq_service_tree *st; struct bfq_group *bfqg = pd_to_bfqg(pd); struct bfq_data *bfqd = bfqg->bfqd; struct bfq_entity *entity = bfqg->my_entity; unsigned long flags; int i; spin_lock_irqsave(&bfqd->lock, flags); if (!entity) /* root group */ goto put_async_queues; /* * Empty all service_trees belonging to this group before * deactivating the group itself. */ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { st = bfqg->sched_data.service_tree + i; /* * It may happen that some queues are still active * (busy) upon group destruction (if the corresponding * processes have been forced to terminate). We move * all the leaf entities corresponding to these queues * to the root_group. * Also, it may happen that the group has an entity * in service, which is disconnected from the active * tree: it must be moved, too. * There is no need to put the sync queues, as the * scheduler has taken no reference. */ bfq_reparent_active_queues(bfqd, bfqg, st, i); /* * The idle tree may still contain bfq_queues * belonging to exited task because they never * migrated to a different cgroup from the one being * destroyed now. In addition, even * bfq_reparent_active_queues() may happen to add some * entities to the idle tree. It happens if, in some * of the calls to bfq_bfqq_move() performed by * bfq_reparent_active_queues(), the queue to move is * empty and gets expired. */ bfq_flush_idle_tree(st); } __bfq_deactivate_entity(entity, false); put_async_queues: bfq_put_async_queues(bfqd, bfqg); spin_unlock_irqrestore(&bfqd->lock, flags); /* * @blkg is going offline and will be ignored by * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so * that they don't get lost. If IOs complete after this point, the * stats for them will be lost. Oh well... */ bfqg_stats_xfer_dead(bfqg); } void bfq_end_wr_async(struct bfq_data *bfqd) { struct blkcg_gq *blkg; list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); bfq_end_wr_async_queues(bfqd, bfqg); } bfq_end_wr_async_queues(bfqd, bfqd->root_group); } static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); unsigned int val = 0; if (bfqgd) val = bfqgd->weight; seq_printf(sf, "%u\n", val); return 0; } static u64 bfqg_prfill_weight_device(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); if (!bfqg->entity.dev_weight) return 0; return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight); } static int bfq_io_show_weight(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); seq_printf(sf, "default %u\n", bfqgd->weight); blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device, &blkcg_policy_bfq, 0, false); return 0; } static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight) { weight = dev_weight ?: weight; bfqg->entity.dev_weight = dev_weight; /* * Setting the prio_changed flag of the entity * to 1 with new_weight == weight would re-set * the value of the weight to its ioprio mapping. * Set the flag only if necessary. */ if ((unsigned short)weight != bfqg->entity.new_weight) { bfqg->entity.new_weight = (unsigned short)weight; /* * Make sure that the above new value has been * stored in bfqg->entity.new_weight before * setting the prio_changed flag. In fact, * this flag may be read asynchronously (in * critical sections protected by a different * lock than that held here), and finding this * flag set may cause the execution of the code * for updating parameters whose value may * depend also on bfqg->entity.new_weight (in * __bfq_entity_update_weight_prio). * This barrier makes sure that the new value * of bfqg->entity.new_weight is correctly * seen in that code. */ smp_wmb(); bfqg->entity.prio_changed = 1; } } static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css, struct cftype *cftype, u64 val) { struct blkcg *blkcg = css_to_blkcg(css); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); struct blkcg_gq *blkg; int ret = -ERANGE; if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) return ret; ret = 0; spin_lock_irq(&blkcg->lock); bfqgd->weight = (unsigned short)val; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); if (bfqg) bfq_group_set_weight(bfqg, val, 0); } spin_unlock_irq(&blkcg->lock); return ret; } static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { int ret; struct blkg_conf_ctx ctx; struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct bfq_group *bfqg; u64 v; blkg_conf_init(&ctx, buf); ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) goto out; if (sscanf(ctx.body, "%llu", &v) == 1) { /* require "default" on dfl */ ret = -ERANGE; if (!v) goto out; } else if (!strcmp(strim(ctx.body), "default")) { v = 0; } else { ret = -EINVAL; goto out; } bfqg = blkg_to_bfqg(ctx.blkg); ret = -ERANGE; if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) { bfq_group_set_weight(bfqg, bfqg->entity.weight, v); ret = 0; } out: blkg_conf_exit(&ctx); return ret ?: nbytes; } static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { char *endp; int ret; u64 v; buf = strim(buf); /* "WEIGHT" or "default WEIGHT" sets the default weight */ v = simple_strtoull(buf, &endp, 0); if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) { ret = bfq_io_set_weight_legacy(of_css(of), NULL, v); return ret ?: nbytes; } return bfq_io_set_device_weight(of, buf, nbytes, off); } static int bfqg_print_rwstat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample sum; blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); } static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfqg_print_stat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; u64 sum = 0; lockdep_assert_held(&blkg->q->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { struct bfq_stat *stat; if (!pos_blkg->online) continue; stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off; sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt); } rcu_read_unlock(); return __blkg_prfill_u64(sf, pd, sum); } static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_stat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg); u64 sum = blkg_rwstat_total(&bfqg->stats.bytes); return __blkg_prfill_u64(sf, pd, sum >> 9); } static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample tmp; blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq, offsetof(struct bfq_group, stats.bytes), &tmp); return __blkg_prfill_u64(sf, pd, (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9); } static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples); u64 v = 0; if (samples) { v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum); v = div64_u64(v, samples); } __blkg_prfill_u64(sf, pd, v); return 0; } /* print avg_queue_size */ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_avg_queue_size, &blkcg_policy_bfq, 0, false); return 0; } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { int ret; ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq); if (ret) return NULL; return blkg_to_bfqg(bfqd->queue->root_blkg); } struct blkcg_policy blkcg_policy_bfq = { .dfl_cftypes = bfq_blkg_files, .legacy_cftypes = bfq_blkcg_legacy_files, .cpd_alloc_fn = bfq_cpd_alloc, .cpd_free_fn = bfq_cpd_free, .pd_alloc_fn = bfq_pd_alloc, .pd_init_fn = bfq_pd_init, .pd_offline_fn = bfq_pd_offline, .pd_free_fn = bfq_pd_free, .pd_reset_stats_fn = bfq_pd_reset_stats, }; struct cftype bfq_blkcg_legacy_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight_legacy, .write_u64 = bfq_io_set_weight_legacy, }, { .name = "bfq.weight_device", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, /* statistics, covers only the tasks in the bfqg */ { .name = "bfq.io_service_bytes", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_serviced", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat, }, { .name = "bfq.sectors", .seq_show = bfqg_print_stat_sectors, }, { .name = "bfq.io_service_time", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_wait_time", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_merged", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_queued", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ /* the same statistics which cover the bfqg and its descendants */ { .name = "bfq.io_service_bytes_recursive", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_serviced_recursive", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat_recursive, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time_recursive", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat_recursive, }, { .name = "bfq.sectors_recursive", .seq_show = bfqg_print_stat_sectors_recursive, }, { .name = "bfq.io_service_time_recursive", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_wait_time_recursive", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_merged_recursive", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_queued_recursive", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.avg_queue_size", .seq_show = bfqg_print_avg_queue_size, }, { .name = "bfq.group_wait_time", .private = offsetof(struct bfq_group, stats.group_wait_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.idle_time", .private = offsetof(struct bfq_group, stats.idle_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.empty_time", .private = offsetof(struct bfq_group, stats.empty_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.dequeue", .private = offsetof(struct bfq_group, stats.dequeue), .seq_show = bfqg_print_stat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ { } /* terminate */ }; struct cftype bfq_blkg_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, {} /* terminate */ }; #else /* CONFIG_BFQ_GROUP_IOSCHED */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) {} void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; } entity->sched_data = &bfqg->sched_data; } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} void bfq_end_wr_async(struct bfq_data *bfqd) { bfq_end_wr_async_queues(bfqd, bfqd->root_group); } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { return bfqq->bfqd->root_group; } void bfqg_and_blkg_put(struct bfq_group *bfqg) {} struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; int i; bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); if (!bfqg) return NULL; for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; return bfqg; } #endif /* CONFIG_BFQ_GROUP_IOSCHED */
14 1 14 14 14 1178 1181 12 4 1 1 1 1 1 3 3 3 3 1 1 3 3 2 3 3 3 3 3 3 1 1 5 2 3 5 5 5 5 3 2 1 1 3 5 2 5 3 5 5 5 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 1 1771 1770 312 79 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 // SPDX-License-Identifier: GPL-2.0-only #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/rtnetlink.h> #include <net/busy_poll.h> #include <net/net_namespace.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/sock.h> #include <net/xdp.h> #include <net/xdp_sock.h> #include "dev.h" #include "devmem.h" #include "netdev-genl-gen.h" struct netdev_nl_dump_ctx { unsigned long ifindex; unsigned int rxq_idx; unsigned int txq_idx; unsigned int napi_id; }; static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) { NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx); return (struct netdev_nl_dump_ctx *)cb->ctx; } static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { u64 xsk_features = 0; u64 xdp_rx_meta = 0; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; #define XDP_METADATA_KFUNC(_, flag, __, xmo) \ if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \ xdp_rx_meta |= flag; XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC if (netdev->xsk_tx_metadata_ops) { if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; } if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, netdev->xdp_features, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, xdp_rx_meta, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, xsk_features, NETDEV_A_DEV_PAD)) goto err_cancel_msg; if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) { if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, netdev->xdp_zc_max_segs)) goto err_cancel_msg; } genlmsg_end(rsp, hdr); return 0; err_cancel_msg: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static void netdev_genl_dev_notify(struct net_device *netdev, int cmd) { struct genl_info info; struct sk_buff *ntf; if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev), NETDEV_NLGRP_MGMT)) return; genl_info_init_ntf(&info, &netdev_nl_family, cmd); ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!ntf) return; if (netdev_nl_dev_fill(netdev, ntf, &info)) { nlmsg_free(ntf); return; } genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf, 0, NETDEV_NLGRP_MGMT, GFP_KERNEL); } int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net_device *netdev; struct sk_buff *rsp; u32 ifindex; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX)) return -EINVAL; ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; rtnl_lock(); netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (netdev) err = netdev_nl_dev_fill(netdev, rsp, info); else err = -ENODEV; rtnl_unlock(); if (err) goto err_free_msg; return genlmsg_reply(rsp, info); err_free_msg: nlmsg_free(rsp); return err; } int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; int err = 0; rtnl_lock(); for_each_netdev_dump(net, netdev, ctx->ifindex) { err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb)); if (err < 0) break; } rtnl_unlock(); return err; } static int netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, const struct genl_info *info) { unsigned long irq_suspend_timeout; unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; void *hdr; pid_t pid; if (!napi->dev->up) return 0; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id)) goto nla_put_failure; if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex)) goto nla_put_failure; if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) goto nla_put_failure; if (napi->thread) { pid = task_pid_nr(napi->thread); if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) goto nla_put_failure; } napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi); if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS, napi_defer_hard_irqs)) goto nla_put_failure; irq_suspend_timeout = napi_get_irq_suspend_timeout(napi); if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, irq_suspend_timeout)) goto nla_put_failure; gro_flush_timeout = napi_get_gro_flush_timeout(napi); if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, gro_flush_timeout)) goto nla_put_failure; genlmsg_end(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) { struct napi_struct *napi; struct sk_buff *rsp; u32 napi_id; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) return -EINVAL; napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_fill_one(rsp, napi, info); netdev_unlock(napi->dev); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); err = -ENOENT; } if (err) { goto err_free_msg; } else if (!rsp->len) { err = -ENOENT; goto err_free_msg; } return genlmsg_reply(rsp, info); err_free_msg: nlmsg_free(rsp); return err; } static int netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { struct napi_struct *napi; unsigned int prev_id; int err = 0; if (!netdev->up) return err; prev_id = UINT_MAX; list_for_each_entry(napi, &netdev->napi_list, dev_list) { if (napi->napi_id < MIN_NAPI_ID) continue; /* Dump continuation below depends on the list being sorted */ WARN_ON_ONCE(napi->napi_id >= prev_id); prev_id = napi->napi_id; if (ctx->napi_id && napi->napi_id >= ctx->napi_id) continue; err = netdev_nl_napi_fill_one(rsp, napi, info); if (err) return err; ctx->napi_id = napi->napi_id; } return err; } int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); const struct genl_info *info = genl_info_dump(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; u32 ifindex = 0; int err = 0; if (info->attrs[NETDEV_A_NAPI_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]); if (ifindex) { netdev = netdev_get_by_index_lock(net, ifindex); if (netdev) { err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); netdev_unlock(netdev); } else { err = -ENODEV; } } else { for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); if (err < 0) break; ctx->napi_id = 0; } } return err; } static int netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) { u64 irq_suspend_timeout = 0; u64 gro_flush_timeout = 0; u32 defer = 0; if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); napi_set_defer_hard_irqs(napi, defer); } if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) { irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]); napi_set_irq_suspend_timeout(napi, irq_suspend_timeout); } if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) { gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]); napi_set_gro_flush_timeout(napi, gro_flush_timeout); } return 0; } int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) { struct napi_struct *napi; unsigned int napi_id; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) return -EINVAL; napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_set_config(napi, info); netdev_unlock(napi->dev); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); err = -ENOENT; } return err; } static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { struct net_devmem_dmabuf_binding *binding; struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) || nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) || nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex)) goto nla_put_failure; switch (q_type) { case NETDEV_QUEUE_TYPE_RX: rxq = __netif_get_rx_queue(netdev, q_idx); if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, rxq->napi->napi_id)) goto nla_put_failure; binding = rxq->mp_params.mp_priv; if (binding && nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id)) goto nla_put_failure; break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, txq->napi->napi_id)) goto nla_put_failure; } genlmsg_end(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id, u32 q_type) { switch (q_type) { case NETDEV_QUEUE_TYPE_RX: if (q_id >= netdev->real_num_rx_queues) return -EINVAL; return 0; case NETDEV_QUEUE_TYPE_TX: if (q_id >= netdev->real_num_tx_queues) return -EINVAL; } return 0; } static int netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { int err; if (!netdev->up) return -ENOENT; err = netdev_nl_queue_validate(netdev, q_idx, q_type); if (err) return err; return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info); } int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) { u32 q_id, q_type, ifindex; struct net_device *netdev; struct sk_buff *rsp; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX)) return -EINVAL; q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]); q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]); ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; rtnl_lock(); netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); if (netdev) { err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info); netdev_unlock(netdev); } else { err = -ENODEV; } rtnl_unlock(); if (err) goto err_free_msg; return genlmsg_reply(rsp, info); err_free_msg: nlmsg_free(rsp); return err; } static int netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { int err = 0; if (!netdev->up) return err; for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) { err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx, NETDEV_QUEUE_TYPE_RX, info); if (err) return err; } for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) { err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx, NETDEV_QUEUE_TYPE_TX, info); if (err) return err; } return err; } int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); const struct genl_info *info = genl_info_dump(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; u32 ifindex = 0; int err = 0; if (info->attrs[NETDEV_A_QUEUE_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); rtnl_lock(); if (ifindex) { netdev = netdev_get_by_index_lock(net, ifindex); if (netdev) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); netdev_unlock(netdev); } else { err = -ENODEV; } } else { for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); if (err < 0) break; ctx->rxq_idx = 0; ctx->txq_idx = 0; } } rtnl_unlock(); return err; } #define NETDEV_STAT_NOT_SET (~0ULL) static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size) { const u64 *add = _add; u64 *sum = _sum; while (size) { if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET) *sum += *add; sum++; add++; size -= 8; } } static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value) { if (value == NETDEV_STAT_NOT_SET) return 0; return nla_put_uint(rsp, attr_id, value); } static int netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits)) return -EMSGSIZE; return 0; } static int netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake)) return -EMSGSIZE; return 0; } static int netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp, u32 q_type, int i, const struct genl_info *info) { const struct netdev_stat_ops *ops = netdev->stat_ops; struct netdev_queue_stats_rx rx; struct netdev_queue_stats_tx tx; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) || nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) || nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i)) goto nla_put_failure; switch (q_type) { case NETDEV_QUEUE_TYPE_RX: memset(&rx, 0xff, sizeof(rx)); ops->get_queue_stats_rx(netdev, i, &rx); if (!memchr_inv(&rx, 0xff, sizeof(rx))) goto nla_cancel; if (netdev_nl_stats_write_rx(rsp, &rx)) goto nla_put_failure; break; case NETDEV_QUEUE_TYPE_TX: memset(&tx, 0xff, sizeof(tx)); ops->get_queue_stats_tx(netdev, i, &tx); if (!memchr_inv(&tx, 0xff, sizeof(tx))) goto nla_cancel; if (netdev_nl_stats_write_tx(rsp, &tx)) goto nla_put_failure; break; } genlmsg_end(rsp, hdr); return 0; nla_cancel: genlmsg_cancel(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static int netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { const struct netdev_stat_ops *ops = netdev->stat_ops; int i, err; if (!(netdev->flags & IFF_UP)) return 0; i = ctx->rxq_idx; while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) { err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX, i, info); if (err) return err; ctx->rxq_idx = ++i; } i = ctx->txq_idx; while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) { err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX, i, info); if (err) return err; ctx->txq_idx = ++i; } ctx->rxq_idx = 0; ctx->txq_idx = 0; return 0; } static int netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { struct netdev_queue_stats_rx rx_sum, rx; struct netdev_queue_stats_tx tx_sum, tx; const struct netdev_stat_ops *ops; void *hdr; int i; ops = netdev->stat_ops; /* Netdev can't guarantee any complete counters */ if (!ops->get_base_stats) return 0; memset(&rx_sum, 0xff, sizeof(rx_sum)); memset(&tx_sum, 0xff, sizeof(tx_sum)); ops->get_base_stats(netdev, &rx_sum, &tx_sum); /* The op was there, but nothing reported, don't bother */ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum))) return 0; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) goto nla_put_failure; for (i = 0; i < netdev->real_num_rx_queues; i++) { memset(&rx, 0xff, sizeof(rx)); if (ops->get_queue_stats_rx) ops->get_queue_stats_rx(netdev, i, &rx); netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx)); } for (i = 0; i < netdev->real_num_tx_queues; i++) { memset(&tx, 0xff, sizeof(tx)); if (ops->get_queue_stats_tx) ops->get_queue_stats_tx(netdev, i, &tx); netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx)); } if (netdev_nl_stats_write_rx(rsp, &rx_sum) || netdev_nl_stats_write_tx(rsp, &tx_sum)) goto nla_put_failure; genlmsg_end(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static int netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope, struct sk_buff *skb, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { if (!netdev->stat_ops) return 0; switch (scope) { case 0: return netdev_nl_stats_by_netdev(netdev, skb, info); case NETDEV_QSTATS_SCOPE_QUEUE: return netdev_nl_stats_by_queue(netdev, skb, info, ctx); } return -EINVAL; /* Should not happen, per netlink policy */ } int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); const struct genl_info *info = genl_info_dump(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; unsigned int ifindex; unsigned int scope; int err = 0; scope = 0; if (info->attrs[NETDEV_A_QSTATS_SCOPE]) scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]); ifindex = 0; if (info->attrs[NETDEV_A_QSTATS_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]); rtnl_lock(); if (ifindex) { netdev = __dev_get_by_index(net, ifindex); if (netdev && netdev->stat_ops) { err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, info, ctx); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QSTATS_IFINDEX]); err = netdev ? -EOPNOTSUPP : -ENODEV; } } else { for_each_netdev_dump(net, netdev, ctx->ifindex) { err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, info, ctx); if (err < 0) break; } } rtnl_unlock(); return err; } int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; struct net_devmem_dmabuf_binding *binding; struct list_head *sock_binding_list; u32 ifindex, dmabuf_fd, rxq_idx; struct net_device *netdev; struct sk_buff *rsp; struct nlattr *attr; int rem, err = 0; void *hdr; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES)) return -EINVAL; ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); sock_binding_list = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); if (IS_ERR(sock_binding_list)) return PTR_ERR(sock_binding_list); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; hdr = genlmsg_iput(rsp, info); if (!hdr) { err = -EMSGSIZE; goto err_genlmsg_free; } rtnl_lock(); netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (!netdev || !netif_device_present(netdev)) { err = -ENODEV; goto err_unlock; } if (dev_xdp_prog_count(netdev)) { NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); err = -EEXIST; goto err_unlock; } binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); goto err_unlock; } nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES, genlmsg_data(info->genlhdr), genlmsg_len(info->genlhdr), rem) { err = nla_parse_nested( tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr, netdev_queue_id_nl_policy, info->extack); if (err < 0) goto err_unbind; if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) || NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) { err = -EINVAL; goto err_unbind; } if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]); err = -EINVAL; goto err_unbind; } rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]); err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding, info->extack); if (err) goto err_unbind; } list_add(&binding->list, sock_binding_list); nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); genlmsg_end(rsp, hdr); err = genlmsg_reply(rsp, info); if (err) goto err_unbind; rtnl_unlock(); return 0; err_unbind: net_devmem_unbind_dmabuf(binding); err_unlock: rtnl_unlock(); err_genlmsg_free: nlmsg_free(rsp); return err; } void netdev_nl_sock_priv_init(struct list_head *priv) { INIT_LIST_HEAD(priv); } void netdev_nl_sock_priv_destroy(struct list_head *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; list_for_each_entry_safe(binding, temp, priv, list) { rtnl_lock(); net_devmem_unbind_dmabuf(binding); rtnl_unlock(); } } static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_REGISTER: netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF); break; case NETDEV_UNREGISTER: netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF); break; case NETDEV_XDP_FEAT_CHANGE: netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF); break; } return NOTIFY_OK; } static struct notifier_block netdev_genl_nb = { .notifier_call = netdev_genl_netdevice_event, }; static int __init netdev_genl_init(void) { int err; err = register_netdevice_notifier(&netdev_genl_nb); if (err) return err; err = genl_register_family(&netdev_nl_family); if (err) goto err_unreg_ntf; return 0; err_unreg_ntf: unregister_netdevice_notifier(&netdev_genl_nb); return err; } subsys_initcall(netdev_genl_init);
30 30 4 3 1 1 4 1 1325 517 3970 57 484 48 1 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H /* * Define 'struct task_struct' and provide the main scheduler * APIs (schedule(), wakeup variants, etc.) */ #include <uapi/linux/sched.h> #include <asm/current.h> #include <asm/processor.h> #include <linux/thread_info.h> #include <linux/preempt.h> #include <linux/cpumask_types.h> #include <linux/cache.h> #include <linux/irqflags_types.h> #include <linux/smp_types.h> #include <linux/pid_types.h> #include <linux/sem_types.h> #include <linux/shm.h> #include <linux/kmsan_types.h> #include <linux/mutex_types.h> #include <linux/plist_types.h> #include <linux/hrtimer_types.h> #include <linux/timer_types.h> #include <linux/seccomp_types.h> #include <linux/nodemask_types.h> #include <linux/refcount_types.h> #include <linux/resource.h> #include <linux/latencytop.h> #include <linux/sched/prio.h> #include <linux/sched/types.h> #include <linux/signal_types.h> #include <linux/syscall_user_dispatch_types.h> #include <linux/mm_types_task.h> #include <linux/netdevice_xmit.h> #include <linux/task_io_accounting.h> #include <linux/posix-timers_types.h> #include <linux/restart_block.h> #include <uapi/linux/rseq.h> #include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> #include <linux/livepatch_sched.h> #include <linux/uidgid_types.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; struct bio_list; struct blk_plug; struct bpf_local_storage; struct bpf_run_ctx; struct bpf_net_context; struct capture_control; struct cfs_rq; struct fs_struct; struct futex_pi_state; struct io_context; struct io_uring_task; struct mempolicy; struct nameidata; struct nsproxy; struct perf_event_context; struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; struct robust_list_head; struct root_domain; struct rq; struct sched_attr; struct sched_dl_entity; struct seq_file; struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; struct task_struct; struct user_event_mm; #include <linux/sched/ext.h> /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). * * We have two separate sets of flags: task->__state * is about runnability, while task->exit_state are * about the task exiting. Confusing, but this way * modifying one set can't modify the other one by * mistake. */ /* Used in tsk->__state: */ #define TASK_RUNNING 0x00000000 #define TASK_INTERRUPTIBLE 0x00000001 #define TASK_UNINTERRUPTIBLE 0x00000002 #define __TASK_STOPPED 0x00000004 #define __TASK_TRACED 0x00000008 /* Used in tsk->exit_state: */ #define EXIT_DEAD 0x00000010 #define EXIT_ZOMBIE 0x00000020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* Used in tsk->__state again: */ #define TASK_PARKED 0x00000040 #define TASK_DEAD 0x00000080 #define TASK_WAKEKILL 0x00000100 #define TASK_WAKING 0x00000200 #define TASK_NOLOAD 0x00000400 #define TASK_NEW 0x00000800 #define TASK_RTLOCK_WAIT 0x00001000 #define TASK_FREEZABLE 0x00002000 #define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP)) #define TASK_FROZEN 0x00008000 #define TASK_STATE_MAX 0x00010000 #define TASK_ANY (TASK_STATE_MAX-1) /* * DO NOT ADD ANY NEW USERS ! */ #define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE) /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED __TASK_TRACED #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) /* Convenience macros for the sake of wake_up(): */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) /* get_task_state(): */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) #define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) #define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) /* * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). */ #define is_special_task_state(state) \ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \ TASK_DEAD | TASK_FROZEN)) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ do { \ WARN_ON_ONCE(is_special_task_state(state_value)); \ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_special_state_change(state_value) \ do { \ WARN_ON_ONCE(!is_special_task_state(state_value)); \ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_rtlock_wait_set_state() \ do { \ current->saved_state_change = current->task_state_change;\ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_rtlock_wait_restore_state() \ do { \ current->task_state_change = current->saved_state_change;\ } while (0) #else # define debug_normal_state_change(cond) do { } while (0) # define debug_special_state_change(cond) do { } while (0) # define debug_rtlock_wait_set_state() do { } while (0) # define debug_rtlock_wait_restore_state() do { } while (0) #endif /* * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); * if (CONDITION) * break; * * schedule(); * } * __set_current_state(TASK_RUNNING); * * If the caller does not need such serialisation (because, for instance, the * CONDITION test and condition change and wakeup are under the same lock) then * use __set_current_state(). * * The above is typically ordered against the wakeup, which does: * * CONDITION = 1; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * where wake_up_state()/try_to_wake_up() executes a full memory barrier before * accessing p->__state. * * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * * However, with slightly different timing the wakeup TASK_RUNNING store can * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not * a problem either because that will result in one extra go around the loop * and our @cond test will save the day. * * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ smp_store_mb(current->__state, (state_value)); \ } while (0) /* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must * serialize against wakeups such that any possible in-flight TASK_RUNNING * stores will not collide with our state change. */ #define set_special_state(state_value) \ do { \ unsigned long flags; /* may shadow */ \ \ raw_spin_lock_irqsave(&current->pi_lock, flags); \ debug_special_state_change((state_value)); \ WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(&current->pi_lock, flags); \ } while (0) /* * PREEMPT_RT specific variants for "sleeping" spin/rwlocks * * RT's spin/rwlock substitutions are state preserving. The state of the * task when blocking on the lock is saved in task_struct::saved_state and * restored after the lock has been acquired. These operations are * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT * lock related wakeups while the task is blocked on the lock are * redirected to operate on task_struct::saved_state to ensure that these * are not dropped. On restore task_struct::saved_state is set to * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. * * The lock operation looks like this: * * current_save_and_set_rtlock_wait_state(); * for (;;) { * if (try_lock()) * break; * raw_spin_unlock_irq(&lock->wait_lock); * schedule_rtlock(); * raw_spin_lock_irq(&lock->wait_lock); * set_current_state(TASK_RTLOCK_WAIT); * } * current_restore_rtlock_saved_state(); */ #define current_save_and_set_rtlock_wait_state() \ do { \ lockdep_assert_irqs_disabled(); \ raw_spin_lock(&current->pi_lock); \ current->saved_state = current->__state; \ debug_rtlock_wait_set_state(); \ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ raw_spin_unlock(&current->pi_lock); \ } while (0); #define current_restore_rtlock_saved_state() \ do { \ lockdep_assert_irqs_disabled(); \ raw_spin_lock(&current->pi_lock); \ debug_rtlock_wait_restore_state(); \ WRITE_ONCE(current->__state, current->saved_state); \ current->saved_state = TASK_RUNNING; \ raw_spin_unlock(&current->pi_lock); \ } while (0); #define get_current_state() READ_ONCE(current->__state) /* * Define the task command name length as enum, then it can be visible to * BPF programs. */ enum { TASK_COMM_LEN = 16, }; extern void sched_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern long schedule_timeout(long timeout); extern long schedule_timeout_interruptible(long timeout); extern long schedule_timeout_killable(long timeout); extern long schedule_timeout_uninterruptible(long timeout); extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); asmlinkage void preempt_schedule_irq(void); #ifdef CONFIG_PREEMPT_RT extern void schedule_rtlock(void); #endif extern int __must_check io_schedule_prepare(void); extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); /** * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode * @lock: protects the above two fields * * Stores previous user/system time values such that we can guarantee * monotonicity. */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE u64 utime; u64 stime; raw_spinlock_t lock; #endif }; enum vtime_state { /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, /* Task is idle */ VTIME_IDLE, /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, /* Task runs in userspace in a CPU with VTIME active: */ VTIME_USER, /* Task runs as guests in a CPU with VTIME active: */ VTIME_GUEST, }; struct vtime { seqcount_t seqcount; unsigned long long starttime; enum vtime_state state; unsigned int cpu; u64 utime; u64 stime; u64 gtime; }; /* * Utilization clamp constraints. * @UCLAMP_MIN: Minimum utilization * @UCLAMP_MAX: Maximum utilization * @UCLAMP_CNT: Utilization clamp constraints count */ enum uclamp_id { UCLAMP_MIN = 0, UCLAMP_MAX, UCLAMP_CNT }; #ifdef CONFIG_SMP extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; #endif struct sched_param { int sched_priority; }; struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ /* # of times we have run on this CPU: */ unsigned long pcount; /* Time spent waiting on a runqueue: */ unsigned long long run_delay; /* Max time spent waiting on a runqueue: */ unsigned long long max_run_delay; /* Min time spent waiting on a runqueue: */ unsigned long long min_run_delay; /* Timestamps: */ /* When did we last run on a CPU? */ unsigned long long last_arrival; /* When were we last queued to run? */ unsigned long long last_queued; #endif /* CONFIG_SCHED_INFO */ }; /* * Integer metrics need fixed point arithmetic, e.g., sched/fair * has a few: load, load_avg, util_avg, freq, and capacity. * * We define a basic fixed point arithmetic range, and then formalize * all these metrics based on that basic range. */ # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) /* Increase resolution of cpu_capacity calculations */ # define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT # define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) struct load_weight { unsigned long weight; u32 inv_weight; }; /* * The load/runnable/util_avg accumulates an infinite geometric series * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). * * [load_avg definition] * * load_avg = runnable% * scale_load_down(load) * * [runnable_avg definition] * * runnable_avg = runnable% * SCHED_CAPACITY_SCALE * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE * * where runnable% is the time ratio that a sched_entity is runnable and * running% the time ratio that a sched_entity is running. * * For cfs_rq, they are the aggregated values of all runnable and blocked * sched_entities. * * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU * capacity scaling. The scaling is done through the rq_clock_pelt that is used * for computing those signals (see update_rq_clock_pelt()) * * N.B., the above ratios (runnable% and running%) themselves are in the * range of [0, 1]. To do fixed point arithmetics, we therefore scale them * to as large a range as necessary. This is for example reflected by * util_avg's SCHED_CAPACITY_SCALE. * * [Overflow issue] * * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities * with the highest load (=88761), always runnable on a single cfs_rq, * and should not overflow as the number already hits PID_MAX_LIMIT. * * For all other cases (including 32-bit kernels), struct load_weight's * weight will overflow first before we do, because: * * Max(load_avg) <= Max(load.weight) * * Then it is the load_weight's responsibility to consider overflow * issues. */ struct sched_avg { u64 last_update_time; u64 load_sum; u64 runnable_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; unsigned long runnable_avg; unsigned long util_avg; unsigned int util_est; } ____cacheline_aligned; /* * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg * updates. When a task is dequeued, its util_est should not be updated if its * util_avg has not been updated in the meantime. * This information is mapped into the MSB bit of util_est at dequeue time. * Since max value of util_est for a task is 1024 (PELT util_avg for a task) * it is safe to use MSB. */ #define UTIL_EST_WEIGHT_SHIFT 2 #define UTIL_AVG_UNCHANGED 0x80000000 struct sched_statistics { #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; u64 wait_count; u64 wait_sum; u64 iowait_count; u64 iowait_sum; u64 sleep_start; u64 sleep_max; s64 sum_sleep_runtime; u64 block_start; u64 block_max; s64 sum_block_runtime; s64 exec_max; u64 slice_max; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; u64 nr_wakeups; u64 nr_wakeups_sync; u64 nr_wakeups_migrate; u64 nr_wakeups_local; u64 nr_wakeups_remote; u64 nr_wakeups_affine; u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; #ifdef CONFIG_SCHED_CORE u64 core_forceidle_sum; #endif #endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; struct sched_entity { /* For load-balancing: */ struct load_weight load; struct rb_node run_node; u64 deadline; u64 min_vruntime; u64 min_slice; struct list_head group_node; unsigned char on_rq; unsigned char sched_delayed; unsigned char rel_deadline; unsigned char custom_slice; /* hole */ u64 exec_start; u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; s64 vlag; u64 slice; u64 nr_migrations; #ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; /* cached value of my_q->h_nr_running */ unsigned long runnable_weight; #endif #ifdef CONFIG_SMP /* * Per entity load average tracking. * * Put into separate cache line so it does not * collide with read-mostly values above. */ struct sched_avg avg; #endif }; struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned long watchdog_stamp; unsigned int time_slice; unsigned short on_rq; unsigned short on_list; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ struct rt_rq *rt_rq; /* rq "owned" by this entity/group: */ struct rt_rq *my_q; #endif } __randomize_layout; typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *); typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *); struct sched_dl_entity { struct rb_node rb_node; /* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr(). */ u64 dl_runtime; /* Maximum runtime for each instance */ u64 dl_deadline; /* Relative deadline of each instance */ u64 dl_period; /* Separation of two instances (period) */ u64 dl_bw; /* dl_runtime / dl_period */ u64 dl_density; /* dl_runtime / dl_deadline */ /* * Actual scheduling parameters. Initialized with the values above, * they are continuously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ s64 runtime; /* Remaining runtime for this instance */ u64 deadline; /* Absolute deadline for this instance */ unsigned int flags; /* Specifying the scheduler behaviour */ /* * Some bool flags: * * @dl_throttled tells if we exhausted the runtime. If so, the * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * * @dl_non_contending tells if the task is inactive while still * contributing to the active utilization. In other words, it * indicates if the inactive timer has been armed and its handler * has not been executed yet. This flag is useful to avoid race * conditions between the inactive timer handler and the wakeup * code. * * @dl_overrun tells if the task asked to be informed about runtime * overruns. * * @dl_server tells if this is a server entity. * * @dl_defer tells if this is a deferred or regular server. For * now only defer server exists. * * @dl_defer_armed tells if the deferrable server is waiting * for the replenishment timer to activate it. * * @dl_server_active tells if the dlserver is active(started). * dlserver is started on first cfs enqueue on an idle runqueue * and is stopped when a dequeue results in 0 cfs tasks on the * runqueue. In other words, dlserver is active only when cpu's * runqueue has atleast one cfs task. * * @dl_defer_running tells if the deferrable server is actually * running, skipping the defer phase. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; unsigned int dl_server_active : 1; unsigned int dl_defer : 1; unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ struct hrtimer dl_timer; /* * Inactive timer, responsible for decreasing the active utilization * at the "0-lag time". When a -deadline task blocks, it contributes * to GRUB's active utilization until the "0-lag time", hence a * timer is needed to decrease the active utilization at the correct * time. */ struct hrtimer inactive_timer; /* * Bits for DL-server functionality. Also see the comment near * dl_server_update(). * * @rq the runqueue this server is for * * @server_has_tasks() returns true if @server_pick return a * runnable task. */ struct rq *rq; dl_server_has_tasks_f server_has_tasks; dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES /* * Priority Inheritance. When a DEADLINE scheduling entity is boosted * pi_se points to the donor, otherwise points to the dl_se it belongs * to (the original one/itself). */ struct sched_dl_entity *pi_se; #endif }; #ifdef CONFIG_UCLAMP_TASK /* Number of utilization clamp buckets (shorter alias) */ #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT /* * Utilization clamp for a scheduling entity * @value: clamp value "assigned" to a se * @bucket_id: bucket index corresponding to the "assigned" value * @active: the se is currently refcounted in a rq's bucket * @user_defined: the requested clamp value comes from user-space * * The bucket_id is the index of the clamp bucket matching the clamp value * which is pre-computed and stored to avoid expensive integer divisions from * the fast path. * * The active bit is set whenever a task has got an "effective" value assigned, * which can be different from the clamp value "requested" from user-space. * This allows to know a task is refcounted in the rq's bucket corresponding * to the "effective" bucket_id. * * The user_defined bit is set whenever a task has got a task-specific clamp * value requested from userspace, i.e. the system defaults apply to this task * just as a restriction. This allows to relax default clamps when a less * restrictive task-specific value has been requested, thus allowing to * implement a "nice" semantic. For example, a task running with a 20% * default boost can still drop its own boosting to 0%. */ struct uclamp_se { unsigned int value : bits_per(SCHED_CAPACITY_SCALE); unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); unsigned int active : 1; unsigned int user_defined : 1; }; #endif /* CONFIG_UCLAMP_TASK */ union rcu_special { struct { u8 blocked; u8 need_qs; u8 exp_hint; /* Hint for performance. */ u8 need_mb; /* Readers need smp_mb(). */ } b; /* Bits. */ u32 s; /* Set of bits. */ }; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, perf_sw_context, perf_nr_task_contexts, }; /* * Number of contexts where an event can trigger: * task, softirq, hardirq, nmi. */ #define PERF_NR_CONTEXTS 4 struct wake_q_node { struct wake_q_node *next; }; struct kmap_ctrl { #ifdef CONFIG_KMAP_LOCAL int idx; pte_t pteval[KM_MAX_IDX]; #endif }; struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ struct thread_info thread_info; #endif unsigned int __state; /* saved state for "spinlock sleepers" */ unsigned int saved_state; /* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. */ randomized_struct_fields_start void *stack; refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; unsigned int ptrace; #ifdef CONFIG_MEM_ALLOC_PROFILING struct alloc_tag *alloc_tag; #endif #ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; /* * recent_used_cpu is initially set as the last CPU used by a task * that wakes affine another task. Waker/wakee relationships can * push tasks around a CPU where each wakeup moves to the next one. * Tracking a recently used CPU allows a quick search for a recently * used CPU that may be idle. */ int recent_used_cpu; int wake_cpu; #endif int on_rq; int prio; int static_prio; int normal_prio; unsigned int rt_priority; struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; struct sched_dl_entity *dl_server; #ifdef CONFIG_SCHED_CLASS_EXT struct sched_ext_entity scx; #endif const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE struct rb_node core_node; unsigned long core_cookie; unsigned int core_occupation; #endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif #ifdef CONFIG_UCLAMP_TASK /* * Clamp values requested for a scheduling entity. * Must be updated with task_rq_lock() held. */ struct uclamp_se uclamp_req[UCLAMP_CNT]; /* * Effective clamp values used for a scheduling entity. * Must be updated with task_rq_lock() held. */ struct uclamp_se uclamp[UCLAMP_CNT]; #endif struct sched_statistics stats; #ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; #endif #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; #endif unsigned int policy; unsigned long max_allowed_capacity; int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t *user_cpus_ptr; cpumask_t cpus_mask; void *migration_pending; #ifdef CONFIG_SMP unsigned short migration_disabled; #endif unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU unsigned long rcu_tasks_nvcsw; u8 rcu_tasks_holdout; u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; int rcu_tasks_exit_cpu; struct list_head rcu_tasks_exit_list; #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU int trc_reader_nesting; int trc_ipi_to_cpu; union rcu_special trc_reader_special; struct list_head trc_holdout_list; struct list_head trc_blkd_node; int trc_blkd_cpu; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ struct sched_info sched_info; struct list_head tasks; #ifdef CONFIG_SMP struct plist_node pushable_tasks; struct rb_node pushable_dl_tasks; #endif struct mm_struct *mm; struct mm_struct *active_mm; struct address_space *faults_disabled_mapping; int exit_state; int exit_code; int exit_signal; /* The signal sent when the parent dies: */ int pdeath_signal; /* JOBCTL_*, siglock protected: */ unsigned long jobctl; /* Used for emulating ABI behavior of previous Linux versions: */ unsigned int personality; /* Scheduler bits, serialized by scheduler locks: */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; unsigned sched_task_hot:1; /* Force alignment to the next boundary: */ unsigned :0; /* Unserialized, strictly 'current' */ /* * This field must not be in the scheduler word above due to wakelist * queueing no longer being serialized by p->on_cpu. However: * * p->XXX = X; ttwu() * schedule() if (p->on_rq && ..) // false * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true * deactivate_task() ttwu_queue_wakelist()) * p->on_rq = 0; p->sched_remote_wakeup = Y; * * guarantees all stores of 'current' are visible before * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; #ifdef CONFIG_RT_MUTEXES unsigned sched_rt_mutex:1; #endif /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG_V1 unsigned in_user_fault:1; #endif #ifdef CONFIG_LRU_GEN /* whether the LRU algorithm may apply to this access */ unsigned in_lru_fault:1; #endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif #ifdef CONFIG_CGROUPS /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; /* task is frozen/stopped (used by the cgroup freezer) */ unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP unsigned use_memdelay:1; #endif #ifdef CONFIG_PSI /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif #ifdef CONFIG_PAGE_OWNER /* Used by page_owner=on to detect recursion in page tracking. */ unsigned in_page_owner:1; #endif #ifdef CONFIG_EVENTFD /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif #ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif #ifdef CONFIG_X86_BUS_LOCK_DETECT unsigned reported_split_lock:1; #endif #ifdef CONFIG_TASK_DELAY_ACCT /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif #ifdef CONFIG_PREEMPT_RT struct netdev_xmit net_xmit; #endif unsigned long atomic_flags; /* Flags requiring atomic access. */ struct restart_block restart_block; pid_t pid; pid_t tgid; #ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsigned long stack_canary; #endif /* * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ /* Real parent process: */ struct task_struct __rcu *real_parent; /* Recipient of SIGCHLD, wait4() reports: */ struct task_struct __rcu *parent; /* * Children/sibling form the list of natural children: */ struct list_head children; struct list_head sibling; struct task_struct *group_leader; /* * 'ptraced' is the list of tasks this task is using ptrace() on. * * This includes both natural children and PTRACE_ATTACH targets. * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ struct list_head ptraced; struct list_head ptrace_entry; /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; struct list_head thread_node; struct completion *vfork_done; /* CLONE_CHILD_SETTID: */ int __user *set_child_tid; /* CLONE_CHILD_CLEARTID: */ int __user *clear_child_tid; /* PF_KTHREAD | PF_IO_WORKER */ void *worker_private; u64 utime; u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME u64 utimescaled; u64 stimescaled; #endif u64 gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN struct vtime vtime; #endif #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif /* Context switch counts: */ unsigned long nvcsw; unsigned long nivcsw; /* Monotonic time in nsecs: */ u64 start_time; /* Boot based time in nsecs: */ u64 start_boottime; /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ unsigned long min_flt; unsigned long maj_flt; /* Empty if CONFIG_POSIX_CPUTIMERS=n */ struct posix_cputimers posix_cputimers; #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK struct posix_cputimers_work posix_cputimers_work; #endif /* Process credentials: */ /* Tracer's credentials at attach: */ const struct cred __rcu *ptracer_cred; /* Objective and real subjective task credentials (COW): */ const struct cred __rcu *real_cred; /* Effective (overridable) subjective task credentials (COW): */ const struct cred __rcu *cred; #ifdef CONFIG_KEYS /* Cached requested key. */ struct key *cached_requested_key; #endif /* * executable name, excluding path. * * - normally initialized begin_new_exec() * - set it with set_task_comm() * - strscpy_pad() to ensure it is always NUL-terminated and * zero-padded * - task_lock() to ensure the operation is atomic and the name is * fully updated. */ char comm[TASK_COMM_LEN]; struct nameidata *nameidata; #ifdef CONFIG_SYSVIPC struct sysv_sem sysvsem; struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; unsigned long last_switch_time; #endif /* Filesystem information: */ struct fs_struct *fs; /* Open file information: */ struct files_struct *files; #ifdef CONFIG_IO_URING struct io_uring_task *io_uring; #endif /* Namespaces: */ struct nsproxy *nsproxy; /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct __rcu *sighand; sigset_t blocked; sigset_t real_blocked; /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; struct callback_head *task_works; #ifdef CONFIG_AUDIT #ifdef CONFIG_AUDITSYSCALL struct audit_context *audit_context; #endif kuid_t loginuid; unsigned int sessionid; #endif struct seccomp seccomp; struct syscall_user_dispatch syscall_dispatch; /* Thread group tracking: */ u64 parent_exec_id; u64 self_exec_id; /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock; /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; struct wake_q_node wake_q; #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ struct rb_root_cached pi_waiters; /* Updated under owner's pi_lock and rq lock */ struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ struct rt_mutex_waiter *pi_blocked_on; #endif #ifdef CONFIG_DEBUG_MUTEXES /* Mutex deadlock detection: */ struct mutex_waiter *blocked_on; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP int non_block_count; #endif #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events irqtrace; unsigned int hardirq_threaded; u64 hardirq_chain_key; int softirqs_enabled; int softirq_context; int irq_config; #endif #ifdef CONFIG_PREEMPT_RT int softirq_disable_cnt; #endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; unsigned int lockdep_recursion; struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif #if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP) unsigned int in_ubsan; #endif /* Journalling filesystem info: */ void *journal_info; /* Stacked block device info: */ struct bio_list *bio_list; /* Stack plugging: */ struct blk_plug *plug; /* VM state: */ struct reclaim_state *reclaim_state; struct io_context *io_context; #ifdef CONFIG_COMPACTION struct capture_control *capture_control; #endif /* Ptrace state: */ unsigned long ptrace_message; kernel_siginfo_t *last_siginfo; struct task_io_accounting ioac; #ifdef CONFIG_PSI /* Pressure stall state */ unsigned int psi_flags; #endif #ifdef CONFIG_TASK_XACCT /* Accumulated RSS usage: */ u64 acct_rss_mem1; /* Accumulated virtual memory usage: */ u64 acct_vm_mem1; /* stime + utime since last update: */ u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock: */ struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif #ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; struct mutex futex_exit_mutex; unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS u8 perf_recursion[PERF_NR_CONTEXTS]; struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT unsigned long preempt_disable_ip; #endif #ifdef CONFIG_NUMA /* Protected by alloc_lock: */ struct mempolicy *mempolicy; short il_prev; u8 il_weight; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; unsigned long numa_migrate_retry; /* Migration stamp: */ u64 node_stamp; u64 last_task_numa_placement; u64 last_sum_exec_runtime; struct callback_head numa_work; /* * This pointer is only modified for current in syscall and * pagefault context (and for tasks being destroyed), so it can be read * from any of the following contexts: * - RCU read-side critical section * - current->numa_group from everywhere * - task's runqueue locked, task not running */ struct numa_group __rcu *numa_group; /* * numa_faults is an array split into four regions: * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer * in this precise order. * * faults_memory: Exponential decaying average of faults on a per-node * basis. Scheduling placement decisions are made based on these * counts. The values remain static for the duration of a PTE scan. * faults_cpu: Track the nodes the process was running on when a NUMA * hinting fault was incurred. * faults_memory_buffer and faults_cpu_buffer: Record faults per node * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ unsigned long *numa_faults; unsigned long total_numa_faults; /* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local or failed to migrate. The task scan * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_RSEQ struct rseq __user *rseq; u32 rseq_len; u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption. */ unsigned long rseq_event_mask; # ifdef CONFIG_DEBUG_RSEQ /* * This is a place holder to save a copy of the rseq fields for * validation of read-only fields. The struct rseq has a * variable-length array at the end, so it cannot be used * directly. Reserve a size large enough for the known fields. */ char rseq_fields[sizeof(struct rseq)]; # endif #endif #ifdef CONFIG_SCHED_MM_CID int mm_cid; /* Current cid in mm */ int last_mm_cid; /* Most recent cid in mm */ int migrate_from_cpu; int mm_cid_active; /* Whether cid bitmap is active */ struct callback_head cid_work; #endif struct tlbflush_unmap_batch tlb_ubc; /* Cache last used pipe for splice(): */ struct pipe_inode_info *splice_pipe; struct page_frag task_frag; #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif #ifdef CONFIG_FAULT_INJECTION int make_it_fail; unsigned int fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for a dirty throttling pause: */ int nr_dirtied; int nr_dirtied_pause; /* Start of a write-and-pause period: */ unsigned long dirty_paused_when; #ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif /* * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ u64 timer_slack_ns; u64 default_timer_slack_ns; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) unsigned int kasan_depth; #endif #ifdef CONFIG_KCSAN struct kcsan_ctx kcsan_ctx; #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events kcsan_save_irqtrace; #endif #ifdef CONFIG_KCSAN_WEAK_MEMORY int kcsan_stack_depth; #endif #endif #ifdef CONFIG_KMSAN struct kmsan_ctx kmsan_ctx; #endif #if IS_ENABLED(CONFIG_KUNIT) struct kunit *kunit_test; #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ int curr_ret_stack; int curr_ret_depth; /* Stack of return addresses for return function tracing: */ unsigned long *ret_stack; /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; unsigned long long ftrace_sleeptime; /* * Number of functions that haven't been traced * because of depth overrun: */ atomic_t trace_overrun; /* Pause tracing: */ atomic_t tracing_graph_pause; #endif #ifdef CONFIG_TRACING /* Bitmask and counter of trace recursion: */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV /* See kernel/kcov.c for more details. */ /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; /* Size of the kcov_area: */ unsigned int kcov_size; /* Buffer for coverage collection: */ void *kcov_area; /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; /* KCOV common handle for remote coverage collection: */ u64 kcov_handle; /* KCOV sequence number: */ int kcov_sequence; /* Collect coverage from softirq context: */ unsigned int kcov_softirq; #endif #ifdef CONFIG_MEMCG_V1 struct mem_cgroup *memcg_in_oom; #endif #ifdef CONFIG_MEMCG /* Number of pages to reclaim on returning to userland: */ unsigned int memcg_nr_pages_over_high; /* Used by memcontrol for targeted memcg charge: */ struct mem_cgroup *active_memcg; /* Cache for current->cgroups->memcg->objcg lookups: */ struct obj_cgroup *objcg; #endif #ifdef CONFIG_BLK_CGROUP struct gendisk *throttle_disk; #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) unsigned int sequential_io; unsigned int sequential_io_avg; #endif struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; # ifdef CONFIG_PREEMPT_RT unsigned long saved_state_change; # endif #endif struct rcu_head rcu; refcount_t rcu_users; int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; struct timer_list oom_reaper_timer; #endif #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ refcount_t stack_refcount; #endif #ifdef CONFIG_LIVEPATCH int patch_state; #endif #ifdef CONFIG_SECURITY /* Used by LSM modules for access restriction: */ void *security; #endif #ifdef CONFIG_BPF_SYSCALL /* Used by BPF task local storage */ struct bpf_local_storage __rcu *bpf_storage; /* Used for BPF run context */ struct bpf_run_ctx *bpf_ctx; #endif /* Used by BPF for per-TASK xdp storage */ struct bpf_net_context *bpf_net_context; #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; unsigned long prev_lowest_stack; #endif #ifdef CONFIG_X86_MCE void __user *mce_vaddr; __u64 mce_kflags; u64 mce_addr; __u64 mce_ripv : 1, mce_whole_page : 1, __mce_reserved : 62; struct callback_head mce_kill_me; int mce_count; #endif #ifdef CONFIG_KRETPROBES struct llist_head kretprobe_instances; #endif #ifdef CONFIG_RETHOOK struct llist_head rethooks; #endif #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH /* * If L1D flush is supported on mm context switch * then we use this callback head to queue kill work * to kill tasks that are not running on SMT disabled * cores */ struct callback_head l1d_flush_kill; #endif #ifdef CONFIG_RV /* * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. * If we find justification for more monitors, we can think * about adding more or developing a dynamic method. So far, * none of these are justified. */ union rv_task_monitor rv[RV_PER_TASK_MONITORS]; #endif #ifdef CONFIG_USER_EVENTS struct user_event_mm *user_event_mm; #endif /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. */ randomized_struct_fields_end /* CPU-specific state of this task: */ struct thread_struct thread; /* * WARNING: on x86, 'thread_struct' contains a variable-sized * structure. It *MUST* be at the end of 'task_struct'. * * Do not put anything below here! */ }; #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) static inline unsigned int __task_state_index(unsigned int tsk_state, unsigned int tsk_exit_state) { unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); if ((tsk_state & TASK_IDLE) == TASK_IDLE) state = TASK_REPORT_IDLE; /* * We're lying here, but rather than expose a completely new task state * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. * Report frozen tasks as uninterruptible. */ if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN)) state = TASK_UNINTERRUPTIBLE; return fls(state); } static inline unsigned int task_state_index(struct task_struct *tsk) { return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state); } static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; BUILD_BUG_ON(TASK_REPORT_MAX * 2 != 1 << (sizeof(state_char) - 1)); return state_char[state]; } static inline char task_state_to_char(struct task_struct *tsk) { return task_index_to_char(task_state_index(tsk)); } extern struct pid *cad_pid; /* * Per process flags */ #define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ #define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */ #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to, * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF__HOLE__00800000 0x00800000 #define PF__HOLE__01000000 0x01000000 #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning. * See memalloc_pin_save() */ #define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example * with tsk_used_math (like during threaded core dumping). * There is however an exception to this rule during ptrace * or during fork: the ptracer task is allowed to write to the * child->flags of its traced child (same goes for fork, the parent * can write to the child->flags), because we're guaranteed the * child is not running and in turn not changing child->flags * at the same time the parent does it. */ #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) #define clear_used_math() clear_stopped_child_used_math(current) #define set_used_math() set_stopped_child_used_math(current) #define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) #define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) #define copy_to_stopped_child_used_math(child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && (current->nr_cpus_allowed == 1); #else return true; #endif } /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ { return test_bit(PFA_##name, &p->atomic_flags); } #define TASK_PFA_SET(name, func) \ static inline void task_set_##func(struct task_struct *p) \ { set_bit(PFA_##name, &p->atomic_flags); } #define TASK_PFA_CLEAR(name, func) \ static inline void task_clear_##func(struct task_struct *p) \ { clear_bit(PFA_##name, &p->atomic_flags); } TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) TASK_PFA_TEST(SPREAD_PAGE, spread_page) TASK_PFA_SET(SPREAD_PAGE, spread_page) TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { current->flags &= ~flags; current->flags |= orig_flags & flags; } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); #ifdef CONFIG_SMP /* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); /** * set_cpus_allowed_ptr - set CPU affinity mask of a task * @p: the task * @new_mask: CPU affinity mask * * Return: zero if successful, or a negative error code */ extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask); extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { } static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */ if ((*cpumask_bits(new_mask) & 1) == 0) return -EINVAL; return 0; } static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { if (src->user_cpus_ptr) return -EINVAL; return 0; } static inline void release_user_cpus_ptr(struct task_struct *p) { WARN_ON(p->user_cpus_ptr); } static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) { return 0; } #endif extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); /** * task_nice - return the nice value of a given task. * @p: the task in question. * * Return: The nice value [ -20 ... 0 ... 19 ]. */ static inline int task_nice(const struct task_struct *p) { return PRIO_TO_NICE((p)->static_prio); } extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int available_idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern void sched_set_fifo(struct task_struct *p); extern void sched_set_fifo_low(struct task_struct *p); extern void sched_set_normal(struct task_struct *p, int nice); extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); extern struct task_struct *idle_task(int cpu); /** * is_idle_task - is the specified task an idle task? * @p: the task in question. * * Return: 1 if @p is an idle task. 0 otherwise. */ static __always_inline bool is_idle_task(const struct task_struct *p) { return !!(p->flags & PF_IDLE); } extern struct task_struct *curr_task(int cpu); extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { struct task_struct task; #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; #ifndef CONFIG_THREAD_INFO_IN_TASK extern struct thread_info init_thread_info; #endif extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK # define task_thread_info(task) (&(task)->thread_info) #else # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif /* * find a task by one of its numerical ids * * find_task_by_pid_ns(): * finds a task by its pid in the specified namespace * find_task_by_vpid(): * finds a task by its virtual pid * * see also find_vpid() etc in include/linux/pid.h */ extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); /* * find a task by its virtual pid and get the task struct */ extern struct task_struct *find_get_task_by_vpid(pid_t nr); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); #else static inline void kick_process(struct task_struct *tsk) { } #endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); #define set_task_comm(tsk, from) ({ \ BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \ __set_task_comm(tsk, from, false); \ }) /* * - Why not use task_lock()? * User space can randomly change their names anyway, so locking for readers * doesn't make sense. For writers, locking is probably necessary, as a race * condition could lead to long-term mixed results. * The strscpy_pad() in __set_task_comm() can ensure that the task comm is * always NUL-terminated and zero-padded. Therefore the race condition between * reader and writer is not an issue. * * - BUILD_BUG_ON() can help prevent the buf from being truncated. * Since the callers don't perform any return value checks, this safeguard is * necessary. */ #define get_task_comm(buf, tsk) ({ \ BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \ strscpy_pad(buf, (tsk)->comm); \ buf; \ }) #ifdef CONFIG_SMP static __always_inline void scheduler_ipi(void) { /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send * this IPI. */ preempt_fold_need_resched(); } #else static inline void scheduler_ipi(void) { } #endif extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); /* * Set thread flags in other task's structures. * See asm/thread_info.h for TIF_xxxx flags available: */ static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) { set_ti_thread_flag(task_thread_info(tsk), flag); } static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) { clear_ti_thread_flag(task_thread_info(tsk), flag); } static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, bool value) { update_ti_thread_flag(task_thread_info(tsk), flag, value); } static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); } static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); } static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_ti_thread_flag(task_thread_info(tsk), flag); } static inline void set_tsk_need_resched(struct task_struct *tsk) { set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } static inline void clear_tsk_need_resched(struct task_struct *tsk) { atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY, (atomic_long_t *)&task_thread_info(tsk)->flags); } static inline int test_tsk_need_resched(struct task_struct *tsk) { return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return * value indicates whether a reschedule was done in fact. * cond_resched_lock() will drop the spinlock before scheduling, */ #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) void sched_dynamic_klp_enable(void); void sched_dynamic_klp_disable(void); DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) { return static_call_mod(cond_resched)(); } #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) extern int dynamic_cond_resched(void); static __always_inline int _cond_resched(void) { return dynamic_cond_resched(); } #else /* !CONFIG_PREEMPTION */ static inline int _cond_resched(void) { klp_sched_try_switch(); return __cond_resched(); } #endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ #else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */ static inline int _cond_resched(void) { klp_sched_try_switch(); return 0; } #endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */ #define cond_resched() ({ \ __might_resched(__FILE__, __LINE__, 0); \ _cond_resched(); \ }) extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_rwlock_read(rwlock_t *lock); extern int __cond_resched_rwlock_write(rwlock_t *lock); #define MIGHT_RESCHED_RCU_SHIFT 8 #define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) #ifndef CONFIG_PREEMPT_RT /* * Non RT kernels have an elevated preempt count due to the held lock, * but are not allowed to be inside a RCU read side critical section */ # define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET #else /* * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in * cond_resched*lock() has to take that into account because it checks for * preempt_count() and rcu_preempt_depth(). */ # define PREEMPT_LOCK_RESCHED_OFFSETS \ (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) #endif #define cond_resched_lock(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_lock(lock); \ }) #define cond_resched_rwlock_read(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_rwlock_read(lock); \ }) #define cond_resched_rwlock_write(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_rwlock_write(lock); \ }) static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); } /* * Wrappers for p->thread_info->cpu access. No-op on UP. */ #ifdef CONFIG_SMP static inline unsigned int task_cpu(const struct task_struct *p) { return READ_ONCE(task_thread_info(p)->cpu); } extern void set_task_cpu(struct task_struct *p, unsigned int cpu); #else static inline unsigned int task_cpu(const struct task_struct *p) { return 0; } static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) { } #endif /* CONFIG_SMP */ static inline bool task_is_runnable(struct task_struct *p) { return p->on_rq && !p->se.sched_delayed; } extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); #include <linux/spinlock.h> /* * In order to reduce various lock holder preemption latencies provide an * interface to see if a vCPU is currently running or not. * * This allows us to terminate optimistic spin loops and block, analogous to * the native optimistic spin heuristic of testing if the lock owner task is * running or not. */ #ifndef vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { return false; } #endif extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif #ifdef CONFIG_SMP static inline bool owner_on_cpu(struct task_struct *owner) { /* * As lock holder preemption issue, we both skip spinning if * task is not on cpu or its cpu is preempted */ return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner)); } /* Returns effective CPU energy utilization, as seen by the scheduler */ unsigned long sched_cpu_util(int cpu); #endif /* CONFIG_SMP */ #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, unsigned long uaddr); extern int sched_core_idle_cpu(int cpu); #else static inline void sched_core_free(struct task_struct *tsk) { } static inline void sched_core_fork(struct task_struct *p) { } static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } #endif extern void sched_set_stop_task(int cpu, struct task_struct *stop); #ifdef CONFIG_MEM_ALLOC_PROFILING static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) { swap(current->alloc_tag, tag); return tag; } static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) { #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); #endif current->alloc_tag = old; } #else #define alloc_tag_save(_tag) NULL #define alloc_tag_restore(_tag, _old) do {} while (0) #endif #endif
4 138 203 142 142 142 138 29 30 30 30 33 142 142 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "super-io.h" #include "sb-counters.h" /* BCH_SB_FIELD_counters */ static const char * const bch2_counter_names[] = { #define x(t, n, ...) (#t), BCH_PERSISTENT_COUNTERS() #undef x NULL }; static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs) { if (!ctrs) return 0; return (__le64 *) vstruct_end(&ctrs->field) - &ctrs->d[0]; }; static int bch2_sb_counters_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { return 0; }; static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field *f) { struct bch_sb_field_counters *ctrs = field_to_type(f, counters); unsigned int nr = bch2_sb_counter_nr_entries(ctrs); for (unsigned i = 0; i < nr; i++) prt_printf(out, "%s \t%llu\n", i < BCH_COUNTER_NR ? bch2_counter_names[i] : "(unknown)", le64_to_cpu(ctrs->d[i])); }; int bch2_sb_counters_to_cpu(struct bch_fs *c) { struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters); unsigned int i; unsigned int nr = bch2_sb_counter_nr_entries(ctrs); u64 val = 0; for (i = 0; i < BCH_COUNTER_NR; i++) c->counters_on_mount[i] = 0; for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) { val = le64_to_cpu(ctrs->d[i]); percpu_u64_set(&c->counters[i], val); c->counters_on_mount[i] = val; } return 0; }; int bch2_sb_counters_from_cpu(struct bch_fs *c) { struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters); struct bch_sb_field_counters *ret; unsigned int i; unsigned int nr = bch2_sb_counter_nr_entries(ctrs); if (nr < BCH_COUNTER_NR) { ret = bch2_sb_field_resize(&c->disk_sb, counters, sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR); if (ret) { ctrs = ret; nr = bch2_sb_counter_nr_entries(ctrs); } } for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) ctrs->d[i] = cpu_to_le64(percpu_u64_get(&c->counters[i])); return 0; } void bch2_fs_counters_exit(struct bch_fs *c) { free_percpu(c->counters); } int bch2_fs_counters_init(struct bch_fs *c) { c->counters = __alloc_percpu(sizeof(u64) * BCH_COUNTER_NR, sizeof(u64)); if (!c->counters) return -BCH_ERR_ENOMEM_fs_counters_init; return bch2_sb_counters_to_cpu(c); } const struct bch_sb_field_ops bch_sb_field_ops_counters = { .validate = bch2_sb_counters_validate, .to_text = bch2_sb_counters_to_text, };
1 1 1 5 5 5 5 3 3 5 3 3 1 5 1 5 3 3 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 // SPDX-License-Identifier: GPL-2.0-or-later /* XTS: as defined in IEEE1619/D16 * http://grouper.ieee.org/groups/1619/email/pdf00086.pdf * * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org> * * Based on ecb.c * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <crypto/xts.h> #include <crypto/b128ops.h> #include <crypto/gf128mul.h> struct xts_tfm_ctx { struct crypto_skcipher *child; struct crypto_cipher *tweak; }; struct xts_instance_ctx { struct crypto_skcipher_spawn spawn; struct crypto_cipher_spawn tweak_spawn; }; struct xts_request_ctx { le128 t; struct scatterlist *tail; struct scatterlist sg[2]; struct skcipher_request subreq; }; static int xts_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child; struct crypto_cipher *tweak; int err; err = xts_verify_key(parent, key, keylen); if (err) return err; keylen /= 2; /* we need two cipher instances: one to compute the initial 'tweak' * by encrypting the IV (usually the 'plain' iv) and the other * one to encrypt and decrypt the data */ /* tweak cipher, uses Key2 i.e. the second half of *key */ tweak = ctx->tweak; crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(tweak, key + keylen, keylen); if (err) return err; /* data cipher, uses Key1 i.e. the first half of *key */ child = ctx->child; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } /* * We compute the tweak masks twice (both before and after the ECB encryption or * decryption) to avoid having to allocate a temporary buffer and/or make * mutliple calls to the 'ecb(..)' instance, which usually would be slower than * just doing the gf128mul_x_ble() calls again. */ static int xts_xor_tweak(struct skcipher_request *req, bool second_pass, bool enc) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); const int bs = XTS_BLOCK_SIZE; struct skcipher_walk w; le128 t = rctx->t; int err; if (second_pass) { req = &rctx->subreq; /* set to our TFM to enforce correct alignment: */ skcipher_request_set_tfm(req, tfm); } err = skcipher_walk_virt(&w, req, false); while (w.nbytes) { unsigned int avail = w.nbytes; le128 *wsrc; le128 *wdst; wsrc = w.src.virt.addr; wdst = w.dst.virt.addr; do { if (unlikely(cts) && w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) { if (!enc) { if (second_pass) rctx->t = t; gf128mul_x_ble(&t, &t); } le128_xor(wdst, &t, wsrc); if (enc && second_pass) gf128mul_x_ble(&rctx->t, &t); skcipher_walk_done(&w, avail - bs); return 0; } le128_xor(wdst++, &t, wsrc++); gf128mul_x_ble(&t, &t); } while ((avail -= bs) >= bs); err = skcipher_walk_done(&w, avail); } return err; } static int xts_xor_tweak_pre(struct skcipher_request *req, bool enc) { return xts_xor_tweak(req, false, enc); } static int xts_xor_tweak_post(struct skcipher_request *req, bool enc) { return xts_xor_tweak(req, true, enc); } static void xts_cts_done(void *data, int err) { struct skcipher_request *req = data; le128 b; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); le128_xor(&b, &rctx->t, &b); scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 1); } skcipher_request_complete(req, err); } static int xts_cts_final(struct skcipher_request *req, int (*crypt)(struct skcipher_request *req)) { const struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1); struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int tail = req->cryptlen % XTS_BLOCK_SIZE; le128 b[2]; int err; rctx->tail = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); b[1] = b[0]; scatterwalk_map_and_copy(b, req->src, offset, tail, 0); le128_xor(b, &rctx->t, b); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE + tail, 1); skcipher_request_set_tfm(subreq, ctx->child); skcipher_request_set_callback(subreq, req->base.flags, xts_cts_done, req); skcipher_request_set_crypt(subreq, rctx->tail, rctx->tail, XTS_BLOCK_SIZE, NULL); err = crypt(subreq); if (err) return err; scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); le128_xor(b, &rctx->t, b); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 1); return 0; } static void xts_encrypt_done(void *data, int err) { struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, true); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_encrypt); if (err == -EINPROGRESS || err == -EBUSY) return; } } skcipher_request_complete(req, err); } static void xts_decrypt_done(void *data, int err) { struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, false); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_decrypt); if (err == -EINPROGRESS || err == -EBUSY) return; } } skcipher_request_complete(req, err); } static int xts_init_crypt(struct skcipher_request *req, crypto_completion_t compl) { const struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; if (req->cryptlen < XTS_BLOCK_SIZE) return -EINVAL; skcipher_request_set_tfm(subreq, ctx->child); skcipher_request_set_callback(subreq, req->base.flags, compl, req); skcipher_request_set_crypt(subreq, req->dst, req->dst, req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL); /* calculate first value of T */ crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv); return 0; } static int xts_encrypt(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int err; err = xts_init_crypt(req, xts_encrypt_done) ?: xts_xor_tweak_pre(req, true) ?: crypto_skcipher_encrypt(subreq) ?: xts_xor_tweak_post(req, true); if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) return err; return xts_cts_final(req, crypto_skcipher_encrypt); } static int xts_decrypt(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int err; err = xts_init_crypt(req, xts_decrypt_done) ?: xts_xor_tweak_pre(req, false) ?: crypto_skcipher_decrypt(subreq) ?: xts_xor_tweak_post(req, false); if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) return err; return xts_cts_final(req, crypto_skcipher_decrypt); } static int xts_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *child; struct crypto_cipher *tweak; child = crypto_spawn_skcipher(&ictx->spawn); if (IS_ERR(child)) return PTR_ERR(child); ctx->child = child; tweak = crypto_spawn_cipher(&ictx->tweak_spawn); if (IS_ERR(tweak)) { crypto_free_skcipher(ctx->child); return PTR_ERR(tweak); } ctx->tweak = tweak; crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) + sizeof(struct xts_request_ctx)); return 0; } static void xts_exit_tfm(struct crypto_skcipher *tfm) { struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); crypto_free_cipher(ctx->tweak); } static void xts_free_instance(struct skcipher_instance *inst) { struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ictx->spawn); crypto_drop_cipher(&ictx->tweak_spawn); kfree(inst); } static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_alg_common *alg; char name[CRYPTO_MAX_ALG_NAME]; struct skcipher_instance *inst; struct xts_instance_ctx *ctx; const char *cipher_name; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) return PTR_ERR(cipher_name); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); if (err == -ENOENT) { err = -ENAMETOOLONG; if (snprintf(name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), name, 0, mask); } if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(&ctx->spawn); err = -EINVAL; if (alg->base.cra_blocksize != XTS_BLOCK_SIZE) goto err_free_inst; if (alg->ivsize) goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts", &alg->base); if (err) goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; /* Alas we screwed up the naming so we have to mangle the * cipher name. */ if (!strncmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(name, cipher_name + 4, sizeof(name)); if (len < 2) goto err_free_inst; if (name[len - 1] != ')') goto err_free_inst; name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "xts(%s)", name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; goto err_free_inst; } } else goto err_free_inst; err = crypto_grab_cipher(&ctx->tweak_spawn, skcipher_crypto_instance(inst), name, 0, mask); if (err) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE; inst->alg.base.cra_alignmask = alg->base.cra_alignmask | (__alignof__(u64) - 1); inst->alg.ivsize = XTS_BLOCK_SIZE; inst->alg.min_keysize = alg->min_keysize * 2; inst->alg.max_keysize = alg->max_keysize * 2; inst->alg.base.cra_ctxsize = sizeof(struct xts_tfm_ctx); inst->alg.init = xts_init_tfm; inst->alg.exit = xts_exit_tfm; inst->alg.setkey = xts_setkey; inst->alg.encrypt = xts_encrypt; inst->alg.decrypt = xts_decrypt; inst->free = xts_free_instance; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: xts_free_instance(inst); } return err; } static struct crypto_template xts_tmpl = { .name = "xts", .create = xts_create, .module = THIS_MODULE, }; static int __init xts_module_init(void) { return crypto_register_template(&xts_tmpl); } static void __exit xts_module_exit(void) { crypto_unregister_template(&xts_tmpl); } subsys_initcall(xts_module_init); module_exit(xts_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); MODULE_ALIAS_CRYPTO("xts"); MODULE_IMPORT_NS("CRYPTO_INTERNAL"); MODULE_SOFTDEP("pre: ecb");
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API * * ARC4 Cipher Algorithm * * Jon Oberheide <jon@oberheide.org> */ #include <crypto/arc4.h> #include <linux/module.h> int arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len) { int i, j = 0, k = 0; ctx->x = 1; ctx->y = 0; for (i = 0; i < 256; i++) ctx->S[i] = i; for (i = 0; i < 256; i++) { u32 a = ctx->S[i]; j = (j + in_key[k] + a) & 0xff; ctx->S[i] = ctx->S[j]; ctx->S[j] = a; if (++k >= key_len) k = 0; } return 0; } EXPORT_SYMBOL(arc4_setkey); void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len) { u32 *const S = ctx->S; u32 x, y, a, b; u32 ty, ta, tb; if (len == 0) return; x = ctx->x; y = ctx->y; a = S[x]; y = (y + a) & 0xff; b = S[y]; do { S[y] = a; a = (a + b) & 0xff; S[x] = b; x = (x + 1) & 0xff; ta = S[x]; ty = (y + ta) & 0xff; tb = S[ty]; *out++ = *in++ ^ S[a]; if (--len == 0) break; y = ty; a = ta; b = tb; } while (true); ctx->x = x; ctx->y = y; } EXPORT_SYMBOL(arc4_crypt); MODULE_DESCRIPTION("ARC4 Cipher Algorithm"); MODULE_LICENSE("GPL");
1 4 4 5 4 1 5 5 6 1 1 3 1 8 4 3 1 1 14 1 24 1 1 2 18 1 1 1 4 7 6 9 4 1 4 1 5 4 1 13 14 14 19 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/filter.h> #include <linux/bpf.h> #include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> #include <net/tc_wrapper.h> #define ACT_BPF_NAME_LEN 256 struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; const char *bpf_name; u16 bpf_num_ops; bool is_ebpf; }; static struct tc_action_ops act_bpf_ops; TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); struct bpf_prog *filter; int action, filter_res; tcf_lastuse_update(&prog->tcf_tm); bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb); filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); __skb_pull(skb, skb->mac_len); } else { bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); /* A BPF program may overwrite the default action opcode. * Similarly as in cls_bpf, if filter_res == -1 we use the * default action specified from tc. * * In case a different well-known TC_ACT opcode has been * returned, it will overwrite the default one. * * For everything else that is unknown, TC_ACT_UNSPEC is * returned. */ switch (filter_res) { case TC_ACT_PIPE: case TC_ACT_RECLASSIFY: case TC_ACT_OK: case TC_ACT_REDIRECT: action = filter_res; break; case TC_ACT_SHOT: action = filter_res; qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats)); break; case TC_ACT_UNSPEC: action = prog->tcf_action; break; default: action = TC_ACT_UNSPEC; break; } return action; } static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) { return !prog->bpf_ops; } static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, struct sk_buff *skb) { struct nlattr *nla; if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) return -EMSGSIZE; nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * sizeof(struct sock_filter)); if (nla == NULL) return -EMSGSIZE; memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); return 0; } static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, struct sk_buff *skb) { struct nlattr *nla; if (prog->bpf_name && nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; if (nla_put_u32(skb, TCA_ACT_BPF_ID, prog->filter->aux->id)) return -EMSGSIZE; nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) { unsigned char *tp = skb_tail_pointer(skb); struct tcf_bpf *prog = to_bpf(act); struct tc_act_bpf opt = { .index = prog->tcf_index, .refcnt = refcount_read(&prog->tcf_refcnt) - ref, .bindcnt = atomic_read(&prog->tcf_bindcnt) - bind, }; struct tcf_t tm; int ret; spin_lock_bh(&prog->tcf_lock); opt.action = prog->tcf_action; if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (tcf_bpf_is_ebpf(prog)) ret = tcf_bpf_dump_ebpf_info(prog, skb); else ret = tcf_bpf_dump_bpf_info(prog, skb); if (ret) goto nla_put_failure; tcf_tm_dump(&tm, &prog->tcf_tm); if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm, TCA_ACT_BPF_PAD)) goto nla_put_failure; spin_unlock_bh(&prog->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&prog->tcf_lock); nlmsg_trim(skb, tp); return -1; } static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) { struct sock_filter *bpf_ops; struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; u16 bpf_size, bpf_num_ops; int ret; bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) return -EINVAL; bpf_size = bpf_num_ops * sizeof(*bpf_ops); if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) return -EINVAL; bpf_ops = kmemdup(nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size, GFP_KERNEL); if (bpf_ops == NULL) return -ENOMEM; fprog_tmp.len = bpf_num_ops; fprog_tmp.filter = bpf_ops; ret = bpf_prog_create(&fp, &fprog_tmp); if (ret < 0) { kfree(bpf_ops); return ret; } cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; cfg->is_ebpf = false; return 0; } static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) { struct bpf_prog *fp; char *name = NULL; u32 bpf_fd; bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT); if (IS_ERR(fp)) return PTR_ERR(fp); if (tb[TCA_ACT_BPF_NAME]) { name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL); if (!name) { bpf_prog_put(fp); return -ENOMEM; } } cfg->bpf_name = name; cfg->filter = fp; cfg->is_ebpf = true; return 0; } static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) { struct bpf_prog *filter = cfg->filter; if (filter) { if (cfg->is_ebpf) bpf_prog_put(filter); else bpf_prog_destroy(filter); } kfree(cfg->bpf_ops); kfree(cfg->bpf_name); } static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, struct tcf_bpf_cfg *cfg) { cfg->is_ebpf = tcf_bpf_is_ebpf(prog); /* updates to prog->filter are prevented, since it's called either * with tcf lock or during final cleanup in rcu callback */ cfg->filter = rcu_dereference_protected(prog->filter, 1); cfg->bpf_ops = prog->bpf_ops; cfg->bpf_name = prog->bpf_name; } static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; bool is_bpf, is_ebpf; int ret, res = 0; u32 index; if (!nla) return -EINVAL; ret = nla_parse_nested_deprecated(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_ACT_BPF_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_ACT_BPF_PARMS]); index = parm->index; ret = tcf_idr_check_alloc(tn, &index, act, bind); if (!ret) { ret = tcf_idr_create(tn, index, est, act, &act_bpf_ops, bind, true, flags); if (ret < 0) { tcf_idr_cleanup(tn, index); return ret; } res = ACT_P_CREATED; } else if (ret > 0) { /* Don't override defaults. */ if (bind) return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); return -EEXIST; } } else { return ret; } ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (ret < 0) goto release_idr; is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; if (is_bpf == is_ebpf) { ret = -EINVAL; goto put_chain; } memset(&cfg, 0, sizeof(cfg)); ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : tcf_bpf_init_from_efd(tb, &cfg); if (ret < 0) goto put_chain; prog = to_bpf(*act); spin_lock_bh(&prog->tcf_lock); if (res != ACT_P_CREATED) tcf_bpf_prog_fill_cfg(prog, &old); prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; if (cfg.bpf_num_ops) prog->bpf_num_ops = cfg.bpf_num_ops; goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch); rcu_assign_pointer(prog->filter, cfg.filter); spin_unlock_bh(&prog->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (res != ACT_P_CREATED) { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); tcf_bpf_cfg_cleanup(&old); } return res; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*act, bind); return ret; } static void tcf_bpf_cleanup(struct tc_action *act) { struct tcf_bpf_cfg tmp; tcf_bpf_prog_fill_cfg(to_bpf(act), &tmp); tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .id = TCA_ID_BPF, .owner = THIS_MODULE, .act = tcf_bpf_act, .dump = tcf_bpf_dump, .cleanup = tcf_bpf_cleanup, .init = tcf_bpf_init, .size = sizeof(struct tcf_bpf), }; MODULE_ALIAS_NET_ACT("bpf"); static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_bpf_ops.net_id); } static struct pernet_operations bpf_net_ops = { .init = bpf_init_net, .exit_batch = bpf_exit_net, .id = &act_bpf_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init bpf_init_module(void) { return tcf_register_action(&act_bpf_ops, &bpf_net_ops); } static void __exit bpf_cleanup_module(void) { tcf_unregister_action(&act_bpf_ops, &bpf_net_ops); } module_init(bpf_init_module); module_exit(bpf_cleanup_module); MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); MODULE_DESCRIPTION("TC BPF based action"); MODULE_LICENSE("GPL v2");
372 373 2 1 1 314 314 5 4 312 312 14 13 14 14 1 1 1 1 1 1 4 4 4 4 4 11 11 11 11 312 310 310 1776 1773 973 312 312 19 19 1 1 1 20 20 9 9 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 // SPDX-License-Identifier: GPL-2.0-only /* * File: pn_dev.c * * Phonet network device * * Copyright (C) 2008 Nokia Corporation. * * Authors: Sakari Ailus <sakari.ailus@nokia.com> * Rémi Denis-Courmont */ #include <linux/kernel.h> #include <linux/net.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/phonet.h> #include <linux/proc_fs.h> #include <linux/if_arp.h> #include <net/sock.h> #include <net/netns/generic.h> #include <net/phonet/pn_dev.h> struct phonet_routes { spinlock_t lock; struct net_device __rcu *table[64]; }; struct phonet_net { struct phonet_device_list pndevs; struct phonet_routes routes; }; static unsigned int phonet_net_id __read_mostly; static struct phonet_net *phonet_pernet(struct net *net) { return net_generic(net, phonet_net_id); } struct phonet_device_list *phonet_device_list(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); return &pnn->pndevs; } /* Allocate new Phonet device. */ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); if (pnd == NULL) return NULL; pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); lockdep_assert_held(&pndevs->lock); list_add_rcu(&pnd->list, &pndevs->list); return pnd; } static struct phonet_device *__phonet_get(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; lockdep_assert_held(&pndevs->lock); list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; } return NULL; } static struct phonet_device *__phonet_get_rcu(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; list_for_each_entry_rcu(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; } return NULL; } static void phonet_device_destroy(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; ASSERT_RTNL(); spin_lock(&pndevs->lock); pnd = __phonet_get(dev); if (pnd) list_del_rcu(&pnd->list); spin_unlock(&pndevs->lock); if (pnd) { struct net *net = dev_net(dev); u32 ifindex = dev->ifindex; u8 addr; for_each_set_bit(addr, pnd->addrs, 64) phonet_address_notify(net, RTM_DELADDR, ifindex, addr); kfree(pnd); } } struct net_device *phonet_device_get(struct net *net) { struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; struct net_device *dev = NULL; rcu_read_lock(); list_for_each_entry_rcu(pnd, &pndevs->list, list) { dev = pnd->netdev; BUG_ON(!dev); if ((dev->reg_state == NETREG_REGISTERED) && ((pnd->netdev->flags & IFF_UP)) == IFF_UP) break; dev = NULL; } dev_hold(dev); rcu_read_unlock(); return dev; } int phonet_address_add(struct net_device *dev, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; int err = 0; spin_lock(&pndevs->lock); /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) pnd = __phonet_device_alloc(dev); if (unlikely(pnd == NULL)) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; spin_unlock(&pndevs->lock); return err; } int phonet_address_del(struct net_device *dev, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; int err = 0; spin_lock(&pndevs->lock); pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) { err = -EADDRNOTAVAIL; pnd = NULL; } else if (bitmap_empty(pnd->addrs, 64)) list_del_rcu(&pnd->list); else pnd = NULL; spin_unlock(&pndevs->lock); if (pnd) kfree_rcu(pnd, rcu); return err; } /* Gets a source address toward a destination, through a interface. */ u8 phonet_address_get(struct net_device *dev, u8 daddr) { struct phonet_device *pnd; u8 saddr; rcu_read_lock(); pnd = __phonet_get_rcu(dev); if (pnd) { BUG_ON(bitmap_empty(pnd->addrs, 64)); /* Use same source address as destination, if possible */ if (test_bit(daddr >> 2, pnd->addrs)) saddr = daddr; else saddr = find_first_bit(pnd->addrs, 64) << 2; } else saddr = PN_NO_ADDR; rcu_read_unlock(); if (saddr == PN_NO_ADDR) { /* Fallback to another device */ struct net_device *def_dev; def_dev = phonet_device_get(dev_net(dev)); if (def_dev) { if (def_dev != dev) saddr = phonet_address_get(def_dev, daddr); dev_put(def_dev); } } return saddr; } int phonet_address_lookup(struct net *net, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; int err = -EADDRNOTAVAIL; rcu_read_lock(); list_for_each_entry_rcu(pnd, &pndevs->list, list) { /* Don't allow unregistering devices! */ if ((pnd->netdev->reg_state != NETREG_REGISTERED) || ((pnd->netdev->flags & IFF_UP)) != IFF_UP) continue; if (test_bit(addr >> 2, pnd->addrs)) { err = 0; goto found; } } found: rcu_read_unlock(); return err; } /* automatically configure a Phonet device, if supported */ static int phonet_device_autoconf(struct net_device *dev) { struct if_phonet_req req; int ret; if (!dev->netdev_ops->ndo_siocdevprivate) return -EOPNOTSUPP; ret = dev->netdev_ops->ndo_siocdevprivate(dev, (struct ifreq *)&req, NULL, SIOCPNGAUTOCONF); if (ret < 0) return ret; ASSERT_RTNL(); ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device); if (ret) return ret; phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex, req.ifr_phonet_autoconf.device); return 0; } static void phonet_route_autodel(struct net_device *dev) { struct net *net = dev_net(dev); DECLARE_BITMAP(deleted, 64); u32 ifindex = dev->ifindex; struct phonet_net *pnn; unsigned int i; pnn = phonet_pernet(net); /* Remove left-over Phonet routes */ bitmap_zero(deleted, 64); spin_lock(&pnn->routes.lock); for (i = 0; i < 64; i++) { if (rcu_access_pointer(pnn->routes.table[i]) == dev) { RCU_INIT_POINTER(pnn->routes.table[i], NULL); set_bit(i, deleted); } } spin_unlock(&pnn->routes.lock); if (bitmap_empty(deleted, 64)) return; /* short-circuit RCU */ synchronize_rcu(); for_each_set_bit(i, deleted, 64) { rtm_phonet_notify(net, RTM_DELROUTE, ifindex, i); dev_put(dev); } } /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (what) { case NETDEV_REGISTER: if (dev->type == ARPHRD_PHONET) phonet_device_autoconf(dev); break; case NETDEV_UNREGISTER: phonet_device_destroy(dev); phonet_route_autodel(dev); break; } return 0; } static struct notifier_block phonet_device_notifier = { .notifier_call = phonet_device_notify, .priority = 0, }; /* Per-namespace Phonet devices handling */ static int __net_init phonet_init_net(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); if (!proc_create_net("phonet", 0, net->proc_net, &pn_sock_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); spin_lock_init(&pnn->pndevs.lock); spin_lock_init(&pnn->routes.lock); return 0; } static void __net_exit phonet_exit_net(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); remove_proc_entry("phonet", net->proc_net); WARN_ON_ONCE(!list_empty(&pnn->pndevs.list)); } static struct pernet_operations phonet_net_ops = { .init = phonet_init_net, .exit = phonet_exit_net, .id = &phonet_net_id, .size = sizeof(struct phonet_net), }; /* Initialize Phonet devices list */ int __init phonet_device_init(void) { int err = register_pernet_subsys(&phonet_net_ops); if (err) return err; proc_create_net("pnresource", 0, init_net.proc_net, &pn_res_seq_ops, sizeof(struct seq_net_private)); register_netdevice_notifier(&phonet_device_notifier); err = phonet_netlink_register(); if (err) phonet_device_exit(); return err; } void phonet_device_exit(void) { rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); unregister_pernet_subsys(&phonet_net_ops); remove_proc_entry("pnresource", init_net.proc_net); } int phonet_route_add(struct net_device *dev, u8 daddr) { struct phonet_net *pnn = phonet_pernet(dev_net(dev)); struct phonet_routes *routes = &pnn->routes; int err = -EEXIST; daddr = daddr >> 2; spin_lock(&routes->lock); if (routes->table[daddr] == NULL) { rcu_assign_pointer(routes->table[daddr], dev); dev_hold(dev); err = 0; } spin_unlock(&routes->lock); return err; } int phonet_route_del(struct net_device *dev, u8 daddr) { struct phonet_net *pnn = phonet_pernet(dev_net(dev)); struct phonet_routes *routes = &pnn->routes; daddr = daddr >> 2; spin_lock(&routes->lock); if (rcu_access_pointer(routes->table[daddr]) == dev) RCU_INIT_POINTER(routes->table[daddr], NULL); else dev = NULL; spin_unlock(&routes->lock); if (!dev) return -ENOENT; /* Note : our caller must call synchronize_rcu() and dev_put(dev) */ return 0; } struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr) { struct phonet_net *pnn = phonet_pernet(net); struct phonet_routes *routes = &pnn->routes; struct net_device *dev; daddr >>= 2; dev = rcu_dereference(routes->table[daddr]); return dev; } struct net_device *phonet_route_output(struct net *net, u8 daddr) { struct phonet_net *pnn = phonet_pernet(net); struct phonet_routes *routes = &pnn->routes; struct net_device *dev; daddr >>= 2; rcu_read_lock(); dev = rcu_dereference(routes->table[daddr]); dev_hold(dev); rcu_read_unlock(); if (!dev) dev = phonet_device_get(net); /* Default route */ return dev; }
7 7 7 7 7 1427 39 33293 36 2 33 10 1 9 157 114 23 22 45 20 76 15 18 34 48 15 122 239 98 23 1 23 1 1 22 21 93 91 23 83 84 63 2 2 13 10 4 70 12 16 77 75 8 84 3 3 3 144 143 112 68 142 132 12 76 13 66 46 35 76 139 95 114 141 2 76 20 5 1 14 14 20 6 19 1 4 32 1 31 1 9 22 22 19 3 7 2 2 26 31 22 9 9 22 82 80 4 79 80 79 4 4 4 4 4 6 5 4 5 1 4 4 4 4 4 6 78 26 78 2 147 149 73 47 96 143 2 140 131 54 22 59 59 79 1 8 9 9 9 141 54 79 104 7 111 47 64 77 79 7 34 63 135 7 71 3 3 2 7 7 2 167 18 160 160 37 152 13 1 1 11 10 11 10 9 9 9 11 162 162 27 26 15 28 28 24 8 17 9 10 12 1 11 1 9 10 3 6 24 24 39 38 31 11 21 12 39 39 11 13 152 152 152 21 5041 47 5040 5037 23 5011 5041 20 20 1 1 7 7 7 7 7 10 10 8 22006 17 22003 9 1 6 3 8 8 9 9 9 9 9 9 9 14 33761 2136 33219 14 2 11 11 1 11 20 7 9 41 14 31940 14 31930 3 51 55 3 31917 32 32 5571 5568 4940 4931 22181 2111 29544 20306 20295 7 20293 6708 6714 5 5 3 3 2 1 3 3 3 3 3 3 3 3 3 3 3 1 4715 4716 5 3 3 4704 1116 1097 31 88 3 85 89 68 86 8 2 10 38 14 52 36 24 14 32 28 14 16 1376 1362 18 13 19 19 1 26 34 28 16 5 10 10 11 26 34 34 10 1793 1789 1 1 1 1 34 11 26 34 34 1351 1351 3 3 66 31 35 29 21 17 2 14 3 3 1 4 4 7 3 6 7 5 11 18 11 8 2 1 59 31 35 54 12 29 19 15 26 2 3 3 1 1 27 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 // SPDX-License-Identifier: GPL-2.0-only /* * Simple NUMA memory policy for the Linux kernel. * * Copyright 2003,2004 Andi Kleen, SuSE Labs. * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc. * * NUMA policy allows the user to give hints in which node(s) memory should * be allocated. * * Support six policies per VMA and per process: * * The VMA policy has priority over the process policy for a page fault. * * interleave Allocate memory interleaved over a set of nodes, * with normal fallback if it fails. * For VMA based allocations this interleaves based on the * offset into the backing object or offset into the mapping * for anonymous memory. For process policy an process counter * is used. * * weighted interleave * Allocate memory interleaved over a set of nodes based on * a set of weights (per-node), with normal fallback if it * fails. Otherwise operates the same as interleave. * Example: nodeset(0,1) & weights (2,1) - 2 pages allocated * on node 0 for every 1 page allocated on node 1. * * bind Only allocate memory on a specific set of nodes, * no fallback. * FIXME: memory is allocated starting with the first node * to the last. It would be better if bind would truly restrict * the allocation to memory nodes instead * * preferred Try a specific node first before normal fallback. * As a special case NUMA_NO_NODE here means do the allocation * on the local CPU. This is normally identical to default, * but useful to set in a VMA when you have a non default * process policy. * * preferred many Try a set of nodes first before normal fallback. This is * similar to preferred without the special case. * * default Allocate on the local node first, or when on a VMA * use the process policy. This is what Linux always did * in a NUMA aware kernel and still does by, ahem, default. * * The process policy is applied for most non interrupt memory allocations * in that process' context. Interrupts ignore the policies and always * try to allocate on the local CPU. The VMA policy is only applied for memory * allocations for a VMA in the VM. * * Currently there are a few corner cases in swapping where the policy * is not applied, but the majority should be handled. When process policy * is used it is not remembered over swap outs/swap ins. * * Only the highest zone in the zone hierarchy gets policied. Allocations * requesting a lower zone just use default policy. This implies that * on systems with highmem kernel lowmem allocation don't get policied. * Same with GFP_DMA allocations. * * For shmem/tmpfs shared memory the policy is shared between * all users and remembered even when nobody has memory mapped. */ /* Notebook: fix mmap readahead to honour policy and enable policy for any page cache object statistics for bigpages global policy for page cache? currently it uses process policy. Requires first item above. handle mremap for shared memory (currently ignored for the policy) grows down? make bind policy root only? It can trigger oom much faster and the kernel is not always grateful with that. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mempolicy.h> #include <linux/pagewalk.h> #include <linux/highmem.h> #include <linux/hugetlb.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/numa_balancing.h> #include <linux/sched/task.h> #include <linux/nodemask.h> #include <linux/cpuset.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/export.h> #include <linux/nsproxy.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/compat.h> #include <linux/ptrace.h> #include <linux/swap.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/migrate.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/ctype.h> #include <linux/mm_inline.h> #include <linux/mmu_notifier.h> #include <linux/printk.h> #include <linux/swapops.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <linux/uaccess.h> #include "internal.h" /* Internal flags */ #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ #define MPOL_MF_WRLOCK (MPOL_MF_INTERNAL << 2) /* Write-lock walked vmas */ static struct kmem_cache *policy_cache; static struct kmem_cache *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ enum zone_type policy_zone = 0; /* * run-time system-wide default policy => local allocation */ static struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ .mode = MPOL_LOCAL, }; static struct mempolicy preferred_node_policy[MAX_NUMNODES]; /* * iw_table is the sysfs-set interleave weight table, a value of 0 denotes * system-default value should be used. A NULL iw_table also denotes that * system-default values should be used. Until the system-default table * is implemented, the system-default is always 1. * * iw_table is RCU protected */ static u8 __rcu *iw_table; static DEFINE_MUTEX(iw_table_lock); static u8 get_il_weight(int node) { u8 *table; u8 weight; rcu_read_lock(); table = rcu_dereference(iw_table); /* if no iw_table, use system default */ weight = table ? table[node] : 1; /* if value in iw_table is 0, use system default */ weight = weight ? weight : 1; rcu_read_unlock(); return weight; } /** * numa_nearest_node - Find nearest node by state * @node: Node id to start the search * @state: State to filter the search * * Lookup the closest node by distance if @nid is not in state. * * Return: this @node if it is in state, otherwise the closest node by distance */ int numa_nearest_node(int node, unsigned int state) { int min_dist = INT_MAX, dist, n, min_node; if (state >= NR_NODE_STATES) return -EINVAL; if (node == NUMA_NO_NODE || node_state(node, state)) return node; min_node = node; for_each_node_state(n, state) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; min_node = n; } } return min_node; } EXPORT_SYMBOL_GPL(numa_nearest_node); struct mempolicy *get_task_policy(struct task_struct *p) { struct mempolicy *pol = p->mempolicy; int node; if (pol) return pol; node = numa_node_id(); if (node != NUMA_NO_NODE) { pol = &preferred_node_policy[node]; /* preferred_node_policy is not initialised early in boot */ if (pol->mode) return pol; } return &default_policy; } static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes); } mpol_ops[MPOL_MAX]; static inline int mpol_store_user_nodemask(const struct mempolicy *pol) { return pol->flags & MPOL_MODE_FLAGS; } static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, const nodemask_t *rel) { nodemask_t tmp; nodes_fold(tmp, *orig, nodes_weight(*rel)); nodes_onto(*ret, tmp, *rel); } static int mpol_new_nodemask(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) return -EINVAL; pol->nodes = *nodes; return 0; } static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) return -EINVAL; nodes_clear(pol->nodes); node_set(first_node(*nodes), pol->nodes); return 0; } /* * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if * any, for the new policy. mpol_new() has already validated the nodes * parameter with respect to the policy mode and flags. * * Must be called holding task's alloc_lock to protect task's mems_allowed * and mempolicy. May also be called holding the mmap_lock for write. */ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes, struct nodemask_scratch *nsc) { int ret; /* * Default (pol==NULL) resp. local memory policies are not a * subject of any remapping. They also do not need any special * constructor. */ if (!pol || pol->mode == MPOL_LOCAL) return 0; /* Check N_MEMORY */ nodes_and(nsc->mask1, cpuset_current_mems_allowed, node_states[N_MEMORY]); VM_BUG_ON(!nodes); if (pol->flags & MPOL_F_RELATIVE_NODES) mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); else nodes_and(nsc->mask2, *nodes, nsc->mask1); if (mpol_store_user_nodemask(pol)) pol->w.user_nodemask = *nodes; else pol->w.cpuset_mems_allowed = cpuset_current_mems_allowed; ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); return ret; } /* * This function just creates a new policy, does some check and simple * initialization. You must invoke mpol_set_nodemask() to set nodes. */ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, nodemask_t *nodes) { struct mempolicy *policy; if (mode == MPOL_DEFAULT) { if (nodes && !nodes_empty(*nodes)) return ERR_PTR(-EINVAL); return NULL; } VM_BUG_ON(!nodes); /* * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation). * All other modes require a valid pointer to a non-empty nodemask. */ if (mode == MPOL_PREFERRED) { if (nodes_empty(*nodes)) { if (((flags & MPOL_F_STATIC_NODES) || (flags & MPOL_F_RELATIVE_NODES))) return ERR_PTR(-EINVAL); mode = MPOL_LOCAL; } } else if (mode == MPOL_LOCAL) { if (!nodes_empty(*nodes) || (flags & MPOL_F_STATIC_NODES) || (flags & MPOL_F_RELATIVE_NODES)) return ERR_PTR(-EINVAL); } else if (nodes_empty(*nodes)) return ERR_PTR(-EINVAL); policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!policy) return ERR_PTR(-ENOMEM); atomic_set(&policy->refcnt, 1); policy->mode = mode; policy->flags = flags; policy->home_node = NUMA_NO_NODE; return policy; } /* Slow path of a mpol destructor. */ void __mpol_put(struct mempolicy *pol) { if (!atomic_dec_and_test(&pol->refcnt)) return; kmem_cache_free(policy_cache, pol); } static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes) { } static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes) { nodemask_t tmp; if (pol->flags & MPOL_F_STATIC_NODES) nodes_and(tmp, pol->w.user_nodemask, *nodes); else if (pol->flags & MPOL_F_RELATIVE_NODES) mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); else { nodes_remap(tmp, pol->nodes, pol->w.cpuset_mems_allowed, *nodes); pol->w.cpuset_mems_allowed = *nodes; } if (nodes_empty(tmp)) tmp = *nodes; pol->nodes = tmp; } static void mpol_rebind_preferred(struct mempolicy *pol, const nodemask_t *nodes) { pol->w.cpuset_mems_allowed = *nodes; } /* * mpol_rebind_policy - Migrate a policy to a different set of nodes * * Per-vma policies are protected by mmap_lock. Allocations using per-task * policies are protected by task->mems_allowed_seq to prevent a premature * OOM/allocation failure due to parallel nodemask modification. */ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { if (!pol || pol->mode == MPOL_LOCAL) return; if (!mpol_store_user_nodemask(pol) && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) return; mpol_ops[pol->mode].rebind(pol, newmask); } /* * Wrapper for mpol_rebind_policy() that just requires task * pointer, and updates task mempolicy. * * Called with task's alloc_lock held. */ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { mpol_rebind_policy(tsk->mempolicy, new); } /* * Rebind each vma in mm to new nodemask. * * Call holding a reference to mm. Takes mm->mmap_lock during call. */ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); mmap_write_lock(mm); for_each_vma(vmi, vma) { vma_start_write(vma); mpol_rebind_policy(vma->vm_policy, new); } mmap_write_unlock(mm); } static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { [MPOL_DEFAULT] = { .rebind = mpol_rebind_default, }, [MPOL_INTERLEAVE] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_PREFERRED] = { .create = mpol_new_preferred, .rebind = mpol_rebind_preferred, }, [MPOL_BIND] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_LOCAL] = { .rebind = mpol_rebind_default, }, [MPOL_PREFERRED_MANY] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_preferred, }, [MPOL_WEIGHTED_INTERLEAVE] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, }; static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags); static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, int *nid); static bool strictly_unmovable(unsigned long flags) { /* * STRICT without MOVE flags lets do_mbind() fail immediately with -EIO * if any misplaced page is found. */ return (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) == MPOL_MF_STRICT; } struct migration_mpol { /* for alloc_migration_target_by_mpol() */ struct mempolicy *pol; pgoff_t ilx; }; struct queue_pages { struct list_head *pagelist; unsigned long flags; nodemask_t *nmask; unsigned long start; unsigned long end; struct vm_area_struct *first; struct folio *large; /* note last large folio encountered */ long nr_failed; /* could not be isolated at this time */ }; /* * Check if the folio's nid is in qp->nmask. * * If MPOL_MF_INVERT is set in qp->flags, check if the nid is * in the invert of qp->nmask. */ static inline bool queue_folio_required(struct folio *folio, struct queue_pages *qp) { int nid = folio_nid(folio); unsigned long flags = qp->flags; return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); } static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) { struct folio *folio; struct queue_pages *qp = walk->private; if (unlikely(is_pmd_migration_entry(*pmd))) { qp->nr_failed++; return; } folio = pmd_folio(*pmd); if (is_huge_zero_folio(folio)) { walk->action = ACTION_CONTINUE; return; } if (!queue_folio_required(folio, qp)) return; if (!(qp->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(walk->vma) || !migrate_folio_add(folio, qp->pagelist, qp->flags)) qp->nr_failed++; } /* * Scan through folios, checking if they satisfy the required conditions, * moving them from LRU to local pagelist for migration if they do (or not). * * queue_folios_pte_range() has two possible return values: * 0 - continue walking to scan for more, even if an existing folio on the * wrong node could not be isolated and queued for migration. * -EIO - only MPOL_MF_STRICT was specified, without MPOL_MF_MOVE or ..._ALL, * and an existing folio was on a node that does not follow the policy. */ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { queue_folios_pmd(pmd, walk); spin_unlock(ptl); goto out; } mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); if (pte_none(ptent)) continue; if (!pte_present(ptent)) { if (is_migration_entry(pte_to_swp_entry(ptent))) qp->nr_failed++; continue; } folio = vm_normal_folio(vma, addr, ptent); if (!folio || folio_is_zone_device(folio)) continue; /* * vm_normal_folio() filters out zero pages, but there might * still be reserved folios to skip, perhaps in a VDSO. */ if (folio_test_reserved(folio)) continue; if (!queue_folio_required(folio, qp)) continue; if (folio_test_large(folio)) { /* * A large folio can only be isolated from LRU once, * but may be mapped by many PTEs (and Copy-On-Write may * intersperse PTEs of other, order 0, folios). This is * a common case, so don't mistake it for failure (but * there can be other cases of multi-mapped pages which * this quick check does not help to filter out - and a * search of the pagelist might grow to be prohibitive). * * migrate_pages(&pagelist) returns nr_failed folios, so * check "large" now so that queue_pages_range() returns * a comparable nr_failed folios. This does imply that * if folio could not be isolated for some racy reason * at its first PTE, later PTEs will not give it another * chance of isolation; but keeps the accounting simple. */ if (folio == qp->large) continue; qp->large = folio; } if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(vma) || !migrate_folio_add(folio, qp->pagelist, flags)) { qp->nr_failed++; if (strictly_unmovable(flags)) break; } } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); out: if (qp->nr_failed && strictly_unmovable(flags)) return -EIO; return 0; } static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; struct folio *folio; spinlock_t *ptl; pte_t entry; ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); entry = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(entry)) { if (unlikely(is_hugetlb_entry_migration(entry))) qp->nr_failed++; goto unlock; } folio = pfn_folio(pte_pfn(entry)); if (!queue_folio_required(folio, qp)) goto unlock; if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(walk->vma)) { qp->nr_failed++; goto unlock; } /* * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * * See folio_likely_mapped_shared() on possible imprecision when we * cannot easily detect if a folio is shared. */ if ((flags & MPOL_MF_MOVE_ALL) || (!folio_likely_mapped_shared(folio) && !hugetlb_pmd_shared(pte))) if (!folio_isolate_hugetlb(folio, qp->pagelist)) qp->nr_failed++; unlock: spin_unlock(ptl); if (qp->nr_failed && strictly_unmovable(flags)) return -EIO; #endif return 0; } #ifdef CONFIG_NUMA_BALANCING /* * This is used to mark a range of virtual addresses to be inaccessible. * These are later cleared by a NUMA hinting fault. Depending on these * faults, pages may be migrated for better NUMA placement. * * This is assuming that NUMA faults are handled using PROT_NONE. If * an architecture makes a different choice, it will need further * changes to the core. */ unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct mmu_gather tlb; long nr_updated; tlb_gather_mmu(&tlb, vma->vm_mm); nr_updated = change_protection(&tlb, vma, addr, end, MM_CP_PROT_NUMA); if (nr_updated > 0) { count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); count_memcg_events_mm(vma->vm_mm, NUMA_PTE_UPDATES, nr_updated); } tlb_finish_mmu(&tlb); return nr_updated; } #endif /* CONFIG_NUMA_BALANCING */ static int queue_pages_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *next, *vma = walk->vma; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; /* range check first */ VM_BUG_ON_VMA(!range_in_vma(vma, start, end), vma); if (!qp->first) { qp->first = vma; if (!(flags & MPOL_MF_DISCONTIG_OK) && (qp->start < vma->vm_start)) /* hole at head side of range */ return -EFAULT; } next = find_vma(vma->vm_mm, vma->vm_end); if (!(flags & MPOL_MF_DISCONTIG_OK) && ((vma->vm_end < qp->end) && (!next || vma->vm_end < next->vm_start))) /* hole at middle or tail of range */ return -EFAULT; /* * Need check MPOL_MF_STRICT to return -EIO if possible * regardless of vma_migratable */ if (!vma_migratable(vma) && !(flags & MPOL_MF_STRICT)) return 1; /* * Check page nodes, and queue pages to move, in the current vma. * But if no moving, and no strict checking, the scan can be skipped. */ if (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) return 0; return 1; } static const struct mm_walk_ops queue_pages_walk_ops = { .hugetlb_entry = queue_folios_hugetlb, .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, .walk_lock = PGWALK_RDLOCK, }; static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = { .hugetlb_entry = queue_folios_hugetlb, .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, .walk_lock = PGWALK_WRLOCK, }; /* * Walk through page tables and collect pages to be migrated. * * If pages found in a given range are not on the required set of @nodes, * and migration is allowed, they are isolated and queued to @pagelist. * * queue_pages_range() may return: * 0 - all pages already on the right node, or successfully queued for moving * (or neither strict checking nor moving requested: only range checking). * >0 - this number of misplaced folios could not be queued for moving * (a hugetlbfs page or a transparent huge page being counted as 1). * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs. * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified. */ static long queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, nodemask_t *nodes, unsigned long flags, struct list_head *pagelist) { int err; struct queue_pages qp = { .pagelist = pagelist, .flags = flags, .nmask = nodes, .start = start, .end = end, .first = NULL, }; const struct mm_walk_ops *ops = (flags & MPOL_MF_WRLOCK) ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); if (!qp.first) /* whole range in hole */ err = -EFAULT; return err ? : qp.nr_failed; } /* * Apply policy to a single VMA * This must be called with the mmap_lock held for writing. */ static int vma_replace_policy(struct vm_area_struct *vma, struct mempolicy *pol) { int err; struct mempolicy *old; struct mempolicy *new; vma_assert_write_locked(vma); new = mpol_dup(pol); if (IS_ERR(new)) return PTR_ERR(new); if (vma->vm_ops && vma->vm_ops->set_policy) { err = vma->vm_ops->set_policy(vma, new); if (err) goto err_out; } old = vma->vm_policy; vma->vm_policy = new; /* protected by mmap_lock */ mpol_put(old); return 0; err_out: mpol_put(new); return err; } /* Split or merge the VMA (if required) and apply the new policy */ static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, struct mempolicy *new_pol) { unsigned long vmstart, vmend; vmend = min(end, vma->vm_end); if (start > vma->vm_start) { *prev = vma; vmstart = start; } else { vmstart = vma->vm_start; } if (mpol_equal(vma->vm_policy, new_pol)) { *prev = vma; return 0; } vma = vma_modify_policy(vmi, *prev, vma, vmstart, vmend, new_pol); if (IS_ERR(vma)) return PTR_ERR(vma); *prev = vma; return vma_replace_policy(vma, new_pol); } /* Set the process memory policy */ static long do_set_mempolicy(unsigned short mode, unsigned short flags, nodemask_t *nodes) { struct mempolicy *new, *old; NODEMASK_SCRATCH(scratch); int ret; if (!scratch) return -ENOMEM; new = mpol_new(mode, flags, nodes); if (IS_ERR(new)) { ret = PTR_ERR(new); goto out; } task_lock(current); ret = mpol_set_nodemask(new, nodes, scratch); if (ret) { task_unlock(current); mpol_put(new); goto out; } old = current->mempolicy; current->mempolicy = new; if (new && (new->mode == MPOL_INTERLEAVE || new->mode == MPOL_WEIGHTED_INTERLEAVE)) { current->il_prev = MAX_NUMNODES-1; current->il_weight = 0; } task_unlock(current); mpol_put(old); ret = 0; out: NODEMASK_SCRATCH_FREE(scratch); return ret; } /* * Return nodemask for policy for get_mempolicy() query * * Called with task's alloc_lock held */ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes) { nodes_clear(*nodes); if (pol == &default_policy) return; switch (pol->mode) { case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_WEIGHTED_INTERLEAVE: *nodes = pol->nodes; break; case MPOL_LOCAL: /* return empty node mask for local allocation */ break; default: BUG(); } } static int lookup_node(struct mm_struct *mm, unsigned long addr) { struct page *p = NULL; int ret; ret = get_user_pages_fast(addr & PAGE_MASK, 1, 0, &p); if (ret > 0) { ret = page_to_nid(p); put_page(p); } return ret; } /* Retrieve NUMA policy */ static long do_get_mempolicy(int *policy, nodemask_t *nmask, unsigned long addr, unsigned long flags) { int err; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL; if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) return -EINVAL; if (flags & MPOL_F_MEMS_ALLOWED) { if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; *policy = 0; /* just so it's initialized */ task_lock(current); *nmask = cpuset_current_mems_allowed; task_unlock(current); return 0; } if (flags & MPOL_F_ADDR) { pgoff_t ilx; /* ignored here */ /* * Do NOT fall back to task policy if the * vma/shared policy at addr is NULL. We * want to return MPOL_DEFAULT in this case. */ mmap_read_lock(mm); vma = vma_lookup(mm, addr); if (!vma) { mmap_read_unlock(mm); return -EFAULT; } pol = __get_vma_policy(vma, addr, &ilx); } else if (addr) return -EINVAL; if (!pol) pol = &default_policy; /* indicates default behavior */ if (flags & MPOL_F_NODE) { if (flags & MPOL_F_ADDR) { /* * Take a refcount on the mpol, because we are about to * drop the mmap_lock, after which only "pol" remains * valid, "vma" is stale. */ pol_refcount = pol; vma = NULL; mpol_get(pol); mmap_read_unlock(mm); err = lookup_node(mm, addr); if (err < 0) goto out; *policy = err; } else if (pol == current->mempolicy && pol->mode == MPOL_INTERLEAVE) { *policy = next_node_in(current->il_prev, pol->nodes); } else if (pol == current->mempolicy && pol->mode == MPOL_WEIGHTED_INTERLEAVE) { if (current->il_weight) *policy = current->il_prev; else *policy = next_node_in(current->il_prev, pol->nodes); } else { err = -EINVAL; goto out; } } else { *policy = pol == &default_policy ? MPOL_DEFAULT : pol->mode; /* * Internal mempolicy flags must be masked off before exposing * the policy to userspace. */ *policy |= (pol->flags & MPOL_MODE_FLAGS); } err = 0; if (nmask) { if (mpol_store_user_nodemask(pol)) { *nmask = pol->w.user_nodemask; } else { task_lock(current); get_policy_nodemask(pol, nmask); task_unlock(current); } } out: mpol_cond_put(pol); if (vma) mmap_read_unlock(mm); if (pol_refcount) mpol_put(pol_refcount); return err; } #ifdef CONFIG_MIGRATION static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { /* * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * * See folio_likely_mapped_shared() on possible imprecision when we * cannot easily detect if a folio is shared. */ if ((flags & MPOL_MF_MOVE_ALL) || !folio_likely_mapped_shared(folio)) { if (folio_isolate_lru(folio)) { list_add_tail(&folio->lru, foliolist); node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } else { /* * Non-movable folio may reach here. And, there may be * temporary off LRU folios or non-LRU movable folios. * Treat them as unmovable folios since they can't be * isolated, so they can't be moved at the moment. */ return false; } } return true; } /* * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. */ static long migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) { nodemask_t nmask; struct vm_area_struct *vma; LIST_HEAD(pagelist); long nr_failed; long err = 0; struct migration_target_control mtc = { .nid = dest, .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, .reason = MR_SYSCALL, }; nodes_clear(nmask); node_set(source, nmask); VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); mmap_read_lock(mm); vma = find_vma(mm, 0); if (unlikely(!vma)) { mmap_read_unlock(mm); return 0; } /* * This does not migrate the range, but isolates all pages that * need migration. Between passing in the full user address * space range and MPOL_MF_DISCONTIG_OK, this call cannot fail, * but passes back the count of pages which could not be isolated. */ nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, flags | MPOL_MF_DISCONTIG_OK, &pagelist); mmap_read_unlock(mm); if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL); if (err) putback_movable_pages(&pagelist); } if (err >= 0) err += nr_failed; return err; } /* * Move pages between the two nodesets so as to preserve the physical * layout as much as possible. * * Returns the number of page that could not be moved. */ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { long nr_failed = 0; long err = 0; nodemask_t tmp; lru_cache_disable(); /* * Find a 'source' bit set in 'tmp' whose corresponding 'dest' * bit in 'to' is not also set in 'tmp'. Clear the found 'source' * bit in 'tmp', and return that <source, dest> pair for migration. * The pair of nodemasks 'to' and 'from' define the map. * * If no pair of bits is found that way, fallback to picking some * pair of 'source' and 'dest' bits that are not the same. If the * 'source' and 'dest' bits are the same, this represents a node * that will be migrating to itself, so no pages need move. * * If no bits are left in 'tmp', or if all remaining bits left * in 'tmp' correspond to the same bit in 'to', return false * (nothing left to migrate). * * This lets us pick a pair of nodes to migrate between, such that * if possible the dest node is not already occupied by some other * source node, minimizing the risk of overloading the memory on a * node that would happen if we migrated incoming memory to a node * before migrating outgoing memory source that same node. * * A single scan of tmp is sufficient. As we go, we remember the * most recent <s, d> pair that moved (s != d). If we find a pair * that not only moved, but what's better, moved to an empty slot * (d is not set in tmp), then we break out then, with that pair. * Otherwise when we finish scanning from_tmp, we at least have the * most recent <s, d> pair that moved. If we get all the way through * the scan of tmp without finding any node that moved, much less * moved to an empty node, then there is nothing left worth migrating. */ tmp = *from; while (!nodes_empty(tmp)) { int s, d; int source = NUMA_NO_NODE; int dest = 0; for_each_node_mask(s, tmp) { /* * do_migrate_pages() tries to maintain the relative * node relationship of the pages established between * threads and memory areas. * * However if the number of source nodes is not equal to * the number of destination nodes we can not preserve * this node relative relationship. In that case, skip * copying memory from a node that is in the destination * mask. * * Example: [2,3,4] -> [3,4,5] moves everything. * [0-7] - > [3,4,5] moves only 0,1,2,6,7. */ if ((nodes_weight(*from) != nodes_weight(*to)) && (node_isset(s, *to))) continue; d = node_remap(s, *from, *to); if (s == d) continue; source = s; /* Node moved. Memorize */ dest = d; /* dest not in remaining from nodes? */ if (!node_isset(dest, tmp)) break; } if (source == NUMA_NO_NODE) break; node_clear(source, tmp); err = migrate_to_node(mm, source, dest, flags); if (err > 0) nr_failed += err; if (err < 0) break; } lru_cache_enable(); if (err < 0) return err; return (nr_failed < INT_MAX) ? nr_failed : INT_MAX; } /* * Allocate a new folio for page migration, according to NUMA mempolicy. */ static struct folio *alloc_migration_target_by_mpol(struct folio *src, unsigned long private) { struct migration_mpol *mmpol = (struct migration_mpol *)private; struct mempolicy *pol = mmpol->pol; pgoff_t ilx = mmpol->ilx; unsigned int order; int nid = numa_node_id(); gfp_t gfp; order = folio_order(src); ilx += src->index >> order; if (folio_test_hugetlb(src)) { nodemask_t *nodemask; struct hstate *h; h = folio_hstate(src); gfp = htlb_alloc_mask(h); nodemask = policy_nodemask(gfp, pol, ilx, &nid); return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp, htlb_allow_alloc_fallback(MR_MEMPOLICY_MBIND)); } if (folio_test_large(src)) gfp = GFP_TRANSHUGE; else gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL | __GFP_COMP; return folio_alloc_mpol(gfp, order, pol, ilx, nid); } #else static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { return false; } int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { return -ENOSYS; } static struct folio *alloc_migration_target_by_mpol(struct folio *src, unsigned long private) { return NULL; } #endif static long do_mbind(unsigned long start, unsigned long len, unsigned short mode, unsigned short mode_flags, nodemask_t *nmask, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct vma_iterator vmi; struct migration_mpol mmpol; struct mempolicy *new; unsigned long end; long err; long nr_failed; LIST_HEAD(pagelist); if (flags & ~(unsigned long)MPOL_MF_VALID) return -EINVAL; if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; if (start & ~PAGE_MASK) return -EINVAL; if (mode == MPOL_DEFAULT) flags &= ~MPOL_MF_STRICT; len = PAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; new = mpol_new(mode, mode_flags, nmask); if (IS_ERR(new)) return PTR_ERR(new); /* * If we are using the default policy then operation * on discontinuous address spaces is okay after all */ if (!new) flags |= MPOL_MF_DISCONTIG_OK; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) lru_cache_disable(); { NODEMASK_SCRATCH(scratch); if (scratch) { mmap_write_lock(mm); err = mpol_set_nodemask(new, nmask, scratch); if (err) mmap_write_unlock(mm); } else err = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); } if (err) goto mpol_out; /* * Lock the VMAs before scanning for pages to migrate, * to ensure we don't miss a concurrently inserted page. */ nr_failed = queue_pages_range(mm, start, end, nmask, flags | MPOL_MF_INVERT | MPOL_MF_WRLOCK, &pagelist); if (nr_failed < 0) { err = nr_failed; nr_failed = 0; } else { vma_iter_init(&vmi, mm, start); prev = vma_prev(&vmi); for_each_vma_range(vmi, vma, end) { err = mbind_range(&vmi, vma, &prev, start, end, new); if (err) break; } } if (!err && !list_empty(&pagelist)) { /* Convert MPOL_DEFAULT's NULL to task or default policy */ if (!new) { new = get_task_policy(current); mpol_get(new); } mmpol.pol = new; mmpol.ilx = 0; /* * In the interleaved case, attempt to allocate on exactly the * targeted nodes, for the first VMA to be migrated; for later * VMAs, the nodes will still be interleaved from the targeted * nodemask, but one by one may be selected differently. */ if (new->mode == MPOL_INTERLEAVE || new->mode == MPOL_WEIGHTED_INTERLEAVE) { struct folio *folio; unsigned int order; unsigned long addr = -EFAULT; list_for_each_entry(folio, &pagelist, lru) { if (!folio_test_ksm(folio)) break; } if (!list_entry_is_head(folio, &pagelist, lru)) { vma_iter_init(&vmi, mm, start); for_each_vma_range(vmi, vma, end) { addr = page_address_in_vma(folio, folio_page(folio, 0), vma); if (addr != -EFAULT) break; } } if (addr != -EFAULT) { order = folio_order(folio); /* We already know the pol, but not the ilx */ mpol_cond_put(get_vma_policy(vma, addr, order, &mmpol.ilx)); /* Set base from which to increment by index */ mmpol.ilx -= folio->index >> order; } } } mmap_write_unlock(mm); if (!err && !list_empty(&pagelist)) { nr_failed |= migrate_pages(&pagelist, alloc_migration_target_by_mpol, NULL, (unsigned long)&mmpol, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL); } if (nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; if (!list_empty(&pagelist)) putback_movable_pages(&pagelist); mpol_out: mpol_put(new); if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) lru_cache_enable(); return err; } /* * User space interface with variable sized bitmaps for nodelists. */ static int get_bitmap(unsigned long *mask, const unsigned long __user *nmask, unsigned long maxnode) { unsigned long nlongs = BITS_TO_LONGS(maxnode); int ret; if (in_compat_syscall()) ret = compat_get_bitmap(mask, (const compat_ulong_t __user *)nmask, maxnode); else ret = copy_from_user(mask, nmask, nlongs * sizeof(unsigned long)); if (ret) return -EFAULT; if (maxnode % BITS_PER_LONG) mask[nlongs - 1] &= (1UL << (maxnode % BITS_PER_LONG)) - 1; return 0; } /* Copy a node mask from user space. */ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long maxnode) { --maxnode; nodes_clear(*nodes); if (maxnode == 0 || !nmask) return 0; if (maxnode > PAGE_SIZE*BITS_PER_BYTE) return -EINVAL; /* * When the user specified more nodes than supported just check * if the non supported part is all zero, one word at a time, * starting at the end. */ while (maxnode > MAX_NUMNODES) { unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG); unsigned long t; if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits)) return -EFAULT; if (maxnode - bits >= MAX_NUMNODES) { maxnode -= bits; } else { maxnode = MAX_NUMNODES; t &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1); } if (t) return -EINVAL; } return get_bitmap(nodes_addr(*nodes), nmask, maxnode); } /* Copy a kernel node mask to user space */ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, nodemask_t *nodes) { unsigned long copy = ALIGN(maxnode-1, 64) / 8; unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); bool compat = in_compat_syscall(); if (compat) nbytes = BITS_TO_COMPAT_LONGS(nr_node_ids) * sizeof(compat_long_t); if (copy > nbytes) { if (copy > PAGE_SIZE) return -EINVAL; if (clear_user((char __user *)mask + nbytes, copy - nbytes)) return -EFAULT; copy = nbytes; maxnode = nr_node_ids; } if (compat) return compat_put_bitmap((compat_ulong_t __user *)mask, nodes_addr(*nodes), maxnode); return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; } /* Basic parameter sanity check used by both mbind() and set_mempolicy() */ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags) { *flags = *mode & MPOL_MODE_FLAGS; *mode &= ~MPOL_MODE_FLAGS; if ((unsigned int)(*mode) >= MPOL_MAX) return -EINVAL; if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES)) return -EINVAL; if (*flags & MPOL_F_NUMA_BALANCING) { if (*mode == MPOL_BIND || *mode == MPOL_PREFERRED_MANY) *flags |= (MPOL_F_MOF | MPOL_F_MORON); else return -EINVAL; } return 0; } static long kernel_mbind(unsigned long start, unsigned long len, unsigned long mode, const unsigned long __user *nmask, unsigned long maxnode, unsigned int flags) { unsigned short mode_flags; nodemask_t nodes; int lmode = mode; int err; start = untagged_addr(start); err = sanitize_mpol_flags(&lmode, &mode_flags); if (err) return err; err = get_nodes(&nodes, nmask, maxnode); if (err) return err; return do_mbind(start, len, lmode, mode_flags, &nodes, flags); } SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len, unsigned long, home_node, unsigned long, flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct mempolicy *new, *old; unsigned long end; int err = -ENOENT; VMA_ITERATOR(vmi, mm, start); start = untagged_addr(start); if (start & ~PAGE_MASK) return -EINVAL; /* * flags is used for future extension if any. */ if (flags != 0) return -EINVAL; /* * Check home_node is online to avoid accessing uninitialized * NODE_DATA. */ if (home_node >= MAX_NUMNODES || !node_online(home_node)) return -EINVAL; len = PAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; mmap_write_lock(mm); prev = vma_prev(&vmi); for_each_vma_range(vmi, vma, end) { /* * If any vma in the range got policy other than MPOL_BIND * or MPOL_PREFERRED_MANY we return error. We don't reset * the home node for vmas we already updated before. */ old = vma_policy(vma); if (!old) { prev = vma; continue; } if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { err = -EOPNOTSUPP; break; } new = mpol_dup(old); if (IS_ERR(new)) { err = PTR_ERR(new); break; } vma_start_write(vma); new->home_node = home_node; err = mbind_range(&vmi, vma, &prev, start, end, new); mpol_put(new); if (err) break; } mmap_write_unlock(mm); return err; } SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, unsigned long, mode, const unsigned long __user *, nmask, unsigned long, maxnode, unsigned int, flags) { return kernel_mbind(start, len, mode, nmask, maxnode, flags); } /* Set the process memory policy */ static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, unsigned long maxnode) { unsigned short mode_flags; nodemask_t nodes; int lmode = mode; int err; err = sanitize_mpol_flags(&lmode, &mode_flags); if (err) return err; err = get_nodes(&nodes, nmask, maxnode); if (err) return err; return do_set_mempolicy(lmode, mode_flags, &nodes); } SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, unsigned long, maxnode) { return kernel_set_mempolicy(mode, nmask, maxnode); } static int kernel_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { struct mm_struct *mm = NULL; struct task_struct *task; nodemask_t task_nodes; int err; nodemask_t *old; nodemask_t *new; NODEMASK_SCRATCH(scratch); if (!scratch) return -ENOMEM; old = &scratch->mask1; new = &scratch->mask2; err = get_nodes(old, old_nodes, maxnode); if (err) goto out; err = get_nodes(new, new_nodes, maxnode); if (err) goto out; /* Find the mm_struct */ rcu_read_lock(); task = pid ? find_task_by_vpid(pid) : current; if (!task) { rcu_read_unlock(); err = -ESRCH; goto out; } get_task_struct(task); err = -EINVAL; /* * Check if this process has the right to modify the specified process. * Use the regular "ptrace_may_access()" checks. */ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { rcu_read_unlock(); err = -EPERM; goto out_put; } rcu_read_unlock(); task_nodes = cpuset_mems_allowed(task); /* Is the user allowed to access the target nodes? */ if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out_put; } task_nodes = cpuset_mems_allowed(current); nodes_and(*new, *new, task_nodes); if (nodes_empty(*new)) goto out_put; err = security_task_movememory(task); if (err) goto out_put; mm = get_task_mm(task); put_task_struct(task); if (!mm) { err = -EINVAL; goto out; } err = do_migrate_pages(mm, old, new, capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); mmput(mm); out: NODEMASK_SCRATCH_FREE(scratch); return err; out_put: put_task_struct(task); goto out; } SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, const unsigned long __user *, old_nodes, const unsigned long __user *, new_nodes) { return kernel_migrate_pages(pid, maxnode, old_nodes, new_nodes); } /* Retrieve NUMA policy */ static int kernel_get_mempolicy(int __user *policy, unsigned long __user *nmask, unsigned long maxnode, unsigned long addr, unsigned long flags) { int err; int pval; nodemask_t nodes; if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; addr = untagged_addr(addr); err = do_get_mempolicy(&pval, &nodes, addr, flags); if (err) return err; if (policy && put_user(pval, policy)) return -EFAULT; if (nmask) err = copy_nodes_to_user(nmask, maxnode, &nodes); return err; } SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, unsigned long __user *, nmask, unsigned long, maxnode, unsigned long, addr, unsigned long, flags) { return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags); } bool vma_migratable(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_IO | VM_PFNMAP)) return false; /* * DAX device mappings require predictable access latency, so avoid * incurring periodic faults. */ if (vma_is_dax(vma)) return false; if (is_vm_hugetlb_page(vma) && !hugepage_migration_supported(hstate_vma(vma))) return false; /* * Migration allocates pages in the highest zone. If we cannot * do so then migration (at least from node to node) is not * possible. */ if (vma->vm_file && gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) < policy_zone) return false; return true; } struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx) { *ilx = 0; return (vma->vm_ops && vma->vm_ops->get_policy) ? vma->vm_ops->get_policy(vma, addr, ilx) : vma->vm_policy; } /* * get_vma_policy(@vma, @addr, @order, @ilx) * @vma: virtual memory area whose policy is sought * @addr: address in @vma for shared policy lookup * @order: 0, or appropriate huge_page_order for interleaving * @ilx: interleave index (output), for use only when MPOL_INTERLEAVE or * MPOL_WEIGHTED_INTERLEAVE * * Returns effective policy for a VMA at specified address. * Falls back to current->mempolicy or system default policy, as necessary. * Shared policies [those marked as MPOL_F_SHARED] require an extra reference * count--added by the get_policy() vm_op, as appropriate--to protect against * freeing by another task. It is the caller's responsibility to free the * extra reference for shared policies. */ struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx) { struct mempolicy *pol; pol = __get_vma_policy(vma, addr, ilx); if (!pol) pol = get_task_policy(current); if (pol->mode == MPOL_INTERLEAVE || pol->mode == MPOL_WEIGHTED_INTERLEAVE) { *ilx += vma->vm_pgoff >> order; *ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); } return pol; } bool vma_policy_mof(struct vm_area_struct *vma) { struct mempolicy *pol; if (vma->vm_ops && vma->vm_ops->get_policy) { bool ret = false; pgoff_t ilx; /* ignored here */ pol = vma->vm_ops->get_policy(vma, vma->vm_start, &ilx); if (pol && (pol->flags & MPOL_F_MOF)) ret = true; mpol_cond_put(pol); return ret; } pol = vma->vm_policy; if (!pol) pol = get_task_policy(current); return pol->flags & MPOL_F_MOF; } bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone) { enum zone_type dynamic_policy_zone = policy_zone; BUG_ON(dynamic_policy_zone == ZONE_MOVABLE); /* * if policy->nodes has movable memory only, * we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only. * * policy->nodes is intersect with node_states[N_MEMORY]. * so if the following test fails, it implies * policy->nodes has movable memory only. */ if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) dynamic_policy_zone = ZONE_MOVABLE; return zone >= dynamic_policy_zone; } static unsigned int weighted_interleave_nodes(struct mempolicy *policy) { unsigned int node; unsigned int cpuset_mems_cookie; retry: /* to prevent miscount use tsk->mems_allowed_seq to detect rebind */ cpuset_mems_cookie = read_mems_allowed_begin(); node = current->il_prev; if (!current->il_weight || !node_isset(node, policy->nodes)) { node = next_node_in(node, policy->nodes); if (read_mems_allowed_retry(cpuset_mems_cookie)) goto retry; if (node == MAX_NUMNODES) return node; current->il_prev = node; current->il_weight = get_il_weight(node); } current->il_weight--; return node; } /* Do dynamic interleaving for a process */ static unsigned int interleave_nodes(struct mempolicy *policy) { unsigned int nid; unsigned int cpuset_mems_cookie; /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */ do { cpuset_mems_cookie = read_mems_allowed_begin(); nid = next_node_in(current->il_prev, policy->nodes); } while (read_mems_allowed_retry(cpuset_mems_cookie)); if (nid < MAX_NUMNODES) current->il_prev = nid; return nid; } /* * Depending on the memory policy provide a node from which to allocate the * next slab entry. */ unsigned int mempolicy_slab_node(void) { struct mempolicy *policy; int node = numa_mem_id(); if (!in_task()) return node; policy = current->mempolicy; if (!policy) return node; switch (policy->mode) { case MPOL_PREFERRED: return first_node(policy->nodes); case MPOL_INTERLEAVE: return interleave_nodes(policy); case MPOL_WEIGHTED_INTERLEAVE: return weighted_interleave_nodes(policy); case MPOL_BIND: case MPOL_PREFERRED_MANY: { struct zoneref *z; /* * Follow bind policy behavior and start allocation at the * first node. */ struct zonelist *zonelist; enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL); zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK]; z = first_zones_zonelist(zonelist, highest_zoneidx, &policy->nodes); return zonelist_zone(z) ? zonelist_node_idx(z) : node; } case MPOL_LOCAL: return node; default: BUG(); } } static unsigned int read_once_policy_nodemask(struct mempolicy *pol, nodemask_t *mask) { /* * barrier stabilizes the nodemask locally so that it can be iterated * over safely without concern for changes. Allocators validate node * selection does not violate mems_allowed, so this is safe. */ barrier(); memcpy(mask, &pol->nodes, sizeof(nodemask_t)); barrier(); return nodes_weight(*mask); } static unsigned int weighted_interleave_nid(struct mempolicy *pol, pgoff_t ilx) { nodemask_t nodemask; unsigned int target, nr_nodes; u8 *table; unsigned int weight_total = 0; u8 weight; int nid; nr_nodes = read_once_policy_nodemask(pol, &nodemask); if (!nr_nodes) return numa_node_id(); rcu_read_lock(); table = rcu_dereference(iw_table); /* calculate the total weight */ for_each_node_mask(nid, nodemask) { /* detect system default usage */ weight = table ? table[nid] : 1; weight = weight ? weight : 1; weight_total += weight; } /* Calculate the node offset based on totals */ target = ilx % weight_total; nid = first_node(nodemask); while (target) { /* detect system default usage */ weight = table ? table[nid] : 1; weight = weight ? weight : 1; if (target < weight) break; target -= weight; nid = next_node_in(nid, nodemask); } rcu_read_unlock(); return nid; } /* * Do static interleaving for interleave index @ilx. Returns the ilx'th * node in pol->nodes (starting from ilx=0), wrapping around if ilx * exceeds the number of present nodes. */ static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx) { nodemask_t nodemask; unsigned int target, nnodes; int i; int nid; nnodes = read_once_policy_nodemask(pol, &nodemask); if (!nnodes) return numa_node_id(); target = ilx % nnodes; nid = first_node(nodemask); for (i = 0; i < target; i++) nid = next_node(nid, nodemask); return nid; } /* * Return a nodemask representing a mempolicy for filtering nodes for * page allocation, together with preferred node id (or the input node id). */ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, int *nid) { nodemask_t *nodemask = NULL; switch (pol->mode) { case MPOL_PREFERRED: /* Override input node id */ *nid = first_node(pol->nodes); break; case MPOL_PREFERRED_MANY: nodemask = &pol->nodes; if (pol->home_node != NUMA_NO_NODE) *nid = pol->home_node; break; case MPOL_BIND: /* Restrict to nodemask (but not on lower zones) */ if (apply_policy_zone(pol, gfp_zone(gfp)) && cpuset_nodemask_valid_mems_allowed(&pol->nodes)) nodemask = &pol->nodes; if (pol->home_node != NUMA_NO_NODE) *nid = pol->home_node; /* * __GFP_THISNODE shouldn't even be used with the bind policy * because we might easily break the expectation to stay on the * requested node and not break the policy. */ WARN_ON_ONCE(gfp & __GFP_THISNODE); break; case MPOL_INTERLEAVE: /* Override input node id */ *nid = (ilx == NO_INTERLEAVE_INDEX) ? interleave_nodes(pol) : interleave_nid(pol, ilx); break; case MPOL_WEIGHTED_INTERLEAVE: *nid = (ilx == NO_INTERLEAVE_INDEX) ? weighted_interleave_nodes(pol) : weighted_interleave_nid(pol, ilx); break; } return nodemask; } #ifdef CONFIG_HUGETLBFS /* * huge_node(@vma, @addr, @gfp_flags, @mpol) * @vma: virtual memory area whose policy is sought * @addr: address in @vma for shared policy lookup and interleave policy * @gfp_flags: for requested zone * @mpol: pointer to mempolicy pointer for reference counted mempolicy * @nodemask: pointer to nodemask pointer for 'bind' and 'prefer-many' policy * * Returns a nid suitable for a huge page allocation and a pointer * to the struct mempolicy for conditional unref after allocation. * If the effective policy is 'bind' or 'prefer-many', returns a pointer * to the mempolicy's @nodemask for filtering the zonelist. */ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { pgoff_t ilx; int nid; nid = numa_node_id(); *mpol = get_vma_policy(vma, addr, hstate_vma(vma)->order, &ilx); *nodemask = policy_nodemask(gfp_flags, *mpol, ilx, &nid); return nid; } /* * init_nodemask_of_mempolicy * * If the current task's mempolicy is "default" [NULL], return 'false' * to indicate default policy. Otherwise, extract the policy nodemask * for 'bind' or 'interleave' policy into the argument nodemask, or * initialize the argument nodemask to contain the single node for * 'preferred' or 'local' policy and return 'true' to indicate presence * of non-default mempolicy. * * We don't bother with reference counting the mempolicy [mpol_get/put] * because the current task is examining it's own mempolicy and a task's * mempolicy is only ever changed by the task itself. * * N.B., it is the caller's responsibility to free a returned nodemask. */ bool init_nodemask_of_mempolicy(nodemask_t *mask) { struct mempolicy *mempolicy; if (!(mask && current->mempolicy)) return false; task_lock(current); mempolicy = current->mempolicy; switch (mempolicy->mode) { case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE: *mask = mempolicy->nodes; break; case MPOL_LOCAL: init_nodemask_of_node(mask, numa_node_id()); break; default: BUG(); } task_unlock(current); return true; } #endif /* * mempolicy_in_oom_domain * * If tsk's mempolicy is "bind", check for intersection between mask and * the policy nodemask. Otherwise, return true for all other policies * including "interleave", as a tsk with "interleave" policy may have * memory allocated from all nodes in system. * * Takes task_lock(tsk) to prevent freeing of its mempolicy. */ bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask) { struct mempolicy *mempolicy; bool ret = true; if (!mask) return ret; task_lock(tsk); mempolicy = tsk->mempolicy; if (mempolicy && mempolicy->mode == MPOL_BIND) ret = nodes_intersects(mempolicy->nodes, *mask); task_unlock(tsk); return ret; } static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, int nid, nodemask_t *nodemask) { struct page *page; gfp_t preferred_gfp; /* * This is a two pass approach. The first pass will only try the * preferred nodes but skip the direct reclaim and allow the * allocation to fail, while the second pass will try all the * nodes in system. */ preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); page = __alloc_frozen_pages_noprof(preferred_gfp, order, nid, nodemask); if (!page) page = __alloc_frozen_pages_noprof(gfp, order, nid, NULL); return page; } /** * alloc_pages_mpol - Allocate pages according to NUMA mempolicy. * @gfp: GFP flags. * @order: Order of the page allocation. * @pol: Pointer to the NUMA mempolicy. * @ilx: Index for interleave mempolicy (also distinguishes alloc_pages()). * @nid: Preferred node (usually numa_node_id() but @mpol may override it). * * Return: The page on success or NULL if allocation fails. */ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, struct mempolicy *pol, pgoff_t ilx, int nid) { nodemask_t *nodemask; struct page *page; nodemask = policy_nodemask(gfp, pol, ilx, &nid); if (pol->mode == MPOL_PREFERRED_MANY) return alloc_pages_preferred_many(gfp, order, nid, nodemask); if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && /* filter "hugepage" allocation, unless from alloc_pages() */ order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) { /* * For hugepage allocation and non-interleave policy which * allows the current node (or other explicitly preferred * node) we only try to allocate from the current/preferred * node and don't fall back to other nodes, as the cost of * remote accesses would likely offset THP benefits. * * If the policy is interleave or does not allow the current * node in its nodemask, we allocate the standard way. */ if (pol->mode != MPOL_INTERLEAVE && pol->mode != MPOL_WEIGHTED_INTERLEAVE && (!nodemask || node_isset(nid, *nodemask))) { /* * First, try to allocate THP only on local node, but * don't reclaim unnecessarily, just compact. */ page = __alloc_frozen_pages_noprof( gfp | __GFP_THISNODE | __GFP_NORETRY, order, nid, NULL); if (page || !(gfp & __GFP_DIRECT_RECLAIM)) return page; /* * If hugepage allocations are configured to always * synchronous compact or the vma has been madvised * to prefer hugepage backing, retry allowing remote * memory with both reclaim and compact as well. */ } } page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask); if (unlikely(pol->mode == MPOL_INTERLEAVE || pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) { /* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */ if (static_branch_likely(&vm_numa_stat_key) && page_to_nid(page) == nid) { preempt_disable(); __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT); preempt_enable(); } } return page; } struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *pol, pgoff_t ilx, int nid) { struct page *page = alloc_pages_mpol(gfp | __GFP_COMP, order, pol, ilx, nid); if (!page) return NULL; set_page_refcounted(page); return page_rmappable_folio(page); } /** * vma_alloc_folio - Allocate a folio for a VMA. * @gfp: GFP flags. * @order: Order of the folio. * @vma: Pointer to VMA. * @addr: Virtual address of the allocation. Must be inside @vma. * * Allocate a folio for a specific address in @vma, using the appropriate * NUMA policy. The caller must hold the mmap_lock of the mm_struct of the * VMA to prevent it from going away. Should be used for all allocations * for folios that will be mapped into user space, excepting hugetlbfs, and * excepting where direct use of folio_alloc_mpol() is more appropriate. * * Return: The folio on success or NULL if allocation fails. */ struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol; pgoff_t ilx; struct folio *folio; if (vma->vm_flags & VM_DROPPABLE) gfp |= __GFP_NOWARN; pol = get_vma_policy(vma, addr, order, &ilx); folio = folio_alloc_mpol_noprof(gfp, order, pol, ilx, numa_node_id()); mpol_cond_put(pol); return folio; } EXPORT_SYMBOL(vma_alloc_folio_noprof); struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned order) { struct mempolicy *pol = &default_policy; /* * No reference counting needed for current->mempolicy * nor system default_policy */ if (!in_interrupt() && !(gfp & __GFP_THISNODE)) pol = get_task_policy(current); return alloc_pages_mpol(gfp, order, pol, NO_INTERLEAVE_INDEX, numa_node_id()); } /** * alloc_pages - Allocate pages. * @gfp: GFP flags. * @order: Power of two of number of pages to allocate. * * Allocate 1 << @order contiguous pages. The physical address of the * first page is naturally aligned (eg an order-3 allocation will be aligned * to a multiple of 8 * PAGE_SIZE bytes). The NUMA policy of the current * process is honoured when in process context. * * Context: Can be called from any context, providing the appropriate GFP * flags are used. * Return: The page on success or NULL if allocation fails. */ struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order) { struct page *page = alloc_frozen_pages_noprof(gfp, order); if (page) set_page_refcounted(page); return page; } EXPORT_SYMBOL(alloc_pages_noprof); struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return page_rmappable_folio(alloc_pages_noprof(gfp | __GFP_COMP, order)); } EXPORT_SYMBOL(folio_alloc_noprof); static unsigned long alloc_pages_bulk_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { int nodes; unsigned long nr_pages_per_node; int delta; int i; unsigned long nr_allocated; unsigned long total_allocated = 0; nodes = nodes_weight(pol->nodes); nr_pages_per_node = nr_pages / nodes; delta = nr_pages - nodes * nr_pages_per_node; for (i = 0; i < nodes; i++) { if (delta) { nr_allocated = alloc_pages_bulk_noprof(gfp, interleave_nodes(pol), NULL, nr_pages_per_node + 1, page_array); delta--; } else { nr_allocated = alloc_pages_bulk_noprof(gfp, interleave_nodes(pol), NULL, nr_pages_per_node, page_array); } page_array += nr_allocated; total_allocated += nr_allocated; } return total_allocated; } static unsigned long alloc_pages_bulk_weighted_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { struct task_struct *me = current; unsigned int cpuset_mems_cookie; unsigned long total_allocated = 0; unsigned long nr_allocated = 0; unsigned long rounds; unsigned long node_pages, delta; u8 *table, *weights, weight; unsigned int weight_total = 0; unsigned long rem_pages = nr_pages; nodemask_t nodes; int nnodes, node; int resume_node = MAX_NUMNODES - 1; u8 resume_weight = 0; int prev_node; int i; if (!nr_pages) return 0; /* read the nodes onto the stack, retry if done during rebind */ do { cpuset_mems_cookie = read_mems_allowed_begin(); nnodes = read_once_policy_nodemask(pol, &nodes); } while (read_mems_allowed_retry(cpuset_mems_cookie)); /* if the nodemask has become invalid, we cannot do anything */ if (!nnodes) return 0; /* Continue allocating from most recent node and adjust the nr_pages */ node = me->il_prev; weight = me->il_weight; if (weight && node_isset(node, nodes)) { node_pages = min(rem_pages, weight); nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages, page_array); page_array += nr_allocated; total_allocated += nr_allocated; /* if that's all the pages, no need to interleave */ if (rem_pages <= weight) { me->il_weight -= rem_pages; return total_allocated; } /* Otherwise we adjust remaining pages, continue from there */ rem_pages -= weight; } /* clear active weight in case of an allocation failure */ me->il_weight = 0; prev_node = node; /* create a local copy of node weights to operate on outside rcu */ weights = kzalloc(nr_node_ids, GFP_KERNEL); if (!weights) return total_allocated; rcu_read_lock(); table = rcu_dereference(iw_table); if (table) memcpy(weights, table, nr_node_ids); rcu_read_unlock(); /* calculate total, detect system default usage */ for_each_node_mask(node, nodes) { if (!weights[node]) weights[node] = 1; weight_total += weights[node]; } /* * Calculate rounds/partial rounds to minimize __alloc_pages_bulk calls. * Track which node weighted interleave should resume from. * * if (rounds > 0) and (delta == 0), resume_node will always be * the node following prev_node and its weight. */ rounds = rem_pages / weight_total; delta = rem_pages % weight_total; resume_node = next_node_in(prev_node, nodes); resume_weight = weights[resume_node]; for (i = 0; i < nnodes; i++) { node = next_node_in(prev_node, nodes); weight = weights[node]; node_pages = weight * rounds; /* If a delta exists, add this node's portion of the delta */ if (delta > weight) { node_pages += weight; delta -= weight; } else if (delta) { /* when delta is depleted, resume from that node */ node_pages += delta; resume_node = node; resume_weight = weight - delta; delta = 0; } /* node_pages can be 0 if an allocation fails and rounds == 0 */ if (!node_pages) break; nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages, page_array); page_array += nr_allocated; total_allocated += nr_allocated; if (total_allocated == nr_pages) break; prev_node = node; } me->il_prev = resume_node; me->il_weight = resume_weight; kfree(weights); return total_allocated; } static unsigned long alloc_pages_bulk_preferred_many(gfp_t gfp, int nid, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { gfp_t preferred_gfp; unsigned long nr_allocated = 0; preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); nr_allocated = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes, nr_pages, page_array); if (nr_allocated < nr_pages) nr_allocated += alloc_pages_bulk_noprof(gfp, numa_node_id(), NULL, nr_pages - nr_allocated, page_array + nr_allocated); return nr_allocated; } /* alloc pages bulk and mempolicy should be considered at the * same time in some situation such as vmalloc. * * It can accelerate memory allocation especially interleaving * allocate memory. */ unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array) { struct mempolicy *pol = &default_policy; nodemask_t *nodemask; int nid; if (!in_interrupt() && !(gfp & __GFP_THISNODE)) pol = get_task_policy(current); if (pol->mode == MPOL_INTERLEAVE) return alloc_pages_bulk_interleave(gfp, pol, nr_pages, page_array); if (pol->mode == MPOL_WEIGHTED_INTERLEAVE) return alloc_pages_bulk_weighted_interleave( gfp, pol, nr_pages, page_array); if (pol->mode == MPOL_PREFERRED_MANY) return alloc_pages_bulk_preferred_many(gfp, numa_node_id(), pol, nr_pages, page_array); nid = numa_node_id(); nodemask = policy_nodemask(gfp, pol, NO_INTERLEAVE_INDEX, &nid); return alloc_pages_bulk_noprof(gfp, nid, nodemask, nr_pages, page_array); } int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(src->vm_policy); if (IS_ERR(pol)) return PTR_ERR(pol); dst->vm_policy = pol; return 0; } /* * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it * rebinds the mempolicy its copying by calling mpol_rebind_policy() * with the mems_allowed returned by cpuset_mems_allowed(). This * keeps mempolicies cpuset relative after its cpuset moves. See * further kernel/cpuset.c update_nodemask(). * * current's mempolicy may be rebinded by the other task(the task that changes * cpuset's mems), so we needn't do rebind work for current task. */ /* Slow path of a mempolicy duplicate */ struct mempolicy *__mpol_dup(struct mempolicy *old) { struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!new) return ERR_PTR(-ENOMEM); /* task's mempolicy is protected by alloc_lock */ if (old == current->mempolicy) { task_lock(current); *new = *old; task_unlock(current); } else *new = *old; if (current_cpuset_is_being_rebound()) { nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(new, &mems); } atomic_set(&new->refcnt, 1); return new; } /* Slow path of a mempolicy comparison */ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) { if (!a || !b) return false; if (a->mode != b->mode) return false; if (a->flags != b->flags) return false; if (a->home_node != b->home_node) return false; if (mpol_store_user_nodemask(a)) if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) return false; switch (a->mode) { case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_WEIGHTED_INTERLEAVE: return !!nodes_equal(a->nodes, b->nodes); case MPOL_LOCAL: return true; default: BUG(); return false; } } /* * Shared memory backing store policy support. * * Remember policies even when nobody has shared memory mapped. * The policies are kept in Red-Black tree linked from the inode. * They are protected by the sp->lock rwlock, which should be held * for any accesses to the tree. */ /* * lookup first element intersecting start-end. Caller holds sp->lock for * reading or for writing */ static struct sp_node *sp_lookup(struct shared_policy *sp, pgoff_t start, pgoff_t end) { struct rb_node *n = sp->root.rb_node; while (n) { struct sp_node *p = rb_entry(n, struct sp_node, nd); if (start >= p->end) n = n->rb_right; else if (end <= p->start) n = n->rb_left; else break; } if (!n) return NULL; for (;;) { struct sp_node *w = NULL; struct rb_node *prev = rb_prev(n); if (!prev) break; w = rb_entry(prev, struct sp_node, nd); if (w->end <= start) break; n = prev; } return rb_entry(n, struct sp_node, nd); } /* * Insert a new shared policy into the list. Caller holds sp->lock for * writing. */ static void sp_insert(struct shared_policy *sp, struct sp_node *new) { struct rb_node **p = &sp->root.rb_node; struct rb_node *parent = NULL; struct sp_node *nd; while (*p) { parent = *p; nd = rb_entry(parent, struct sp_node, nd); if (new->start < nd->start) p = &(*p)->rb_left; else if (new->end > nd->end) p = &(*p)->rb_right; else BUG(); } rb_link_node(&new->nd, parent, p); rb_insert_color(&new->nd, &sp->root); } /* Find shared policy intersecting idx */ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { struct mempolicy *pol = NULL; struct sp_node *sn; if (!sp->root.rb_node) return NULL; read_lock(&sp->lock); sn = sp_lookup(sp, idx, idx+1); if (sn) { mpol_get(sn->policy); pol = sn->policy; } read_unlock(&sp->lock); return pol; } static void sp_free(struct sp_node *n) { mpol_put(n->policy); kmem_cache_free(sn_cache, n); } /** * mpol_misplaced - check whether current folio node is valid in policy * * @folio: folio to be checked * @vmf: structure describing the fault * @addr: virtual address in @vma for shared policy lookup and interleave policy * * Lookup current policy node id for vma,addr and "compare to" folio's * node id. Policy determination "mimics" alloc_page_vma(). * Called from fault path where we know the vma and faulting address. * * Return: NUMA_NO_NODE if the page is in a node that is valid for this * policy, or a suitable node ID to allocate a replacement folio from. */ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long addr) { struct mempolicy *pol; pgoff_t ilx; struct zoneref *z; int curnid = folio_nid(folio); struct vm_area_struct *vma = vmf->vma; int thiscpu = raw_smp_processor_id(); int thisnid = numa_node_id(); int polnid = NUMA_NO_NODE; int ret = NUMA_NO_NODE; /* * Make sure ptl is held so that we don't preempt and we * have a stable smp processor id */ lockdep_assert_held(vmf->ptl); pol = get_vma_policy(vma, addr, folio_order(folio), &ilx); if (!(pol->flags & MPOL_F_MOF)) goto out; switch (pol->mode) { case MPOL_INTERLEAVE: polnid = interleave_nid(pol, ilx); break; case MPOL_WEIGHTED_INTERLEAVE: polnid = weighted_interleave_nid(pol, ilx); break; case MPOL_PREFERRED: if (node_isset(curnid, pol->nodes)) goto out; polnid = first_node(pol->nodes); break; case MPOL_LOCAL: polnid = numa_node_id(); break; case MPOL_BIND: case MPOL_PREFERRED_MANY: /* * Even though MPOL_PREFERRED_MANY can allocate pages outside * policy nodemask we don't allow numa migration to nodes * outside policy nodemask for now. This is done so that if we * want demotion to slow memory to happen, before allocating * from some DRAM node say 'x', we will end up using a * MPOL_PREFERRED_MANY mask excluding node 'x'. In such scenario * we should not promote to node 'x' from slow memory node. */ if (pol->flags & MPOL_F_MORON) { /* * Optimize placement among multiple nodes * via NUMA balancing */ if (node_isset(thisnid, pol->nodes)) break; goto out; } /* * use current page if in policy nodemask, * else select nearest allowed node, if any. * If no allowed nodes, use current [!misplaced]. */ if (node_isset(curnid, pol->nodes)) goto out; z = first_zones_zonelist( node_zonelist(thisnid, GFP_HIGHUSER), gfp_zone(GFP_HIGHUSER), &pol->nodes); polnid = zonelist_node_idx(z); break; default: BUG(); } /* Migrate the folio towards the node whose CPU is referencing it */ if (pol->flags & MPOL_F_MORON) { polnid = thisnid; if (!should_numa_migrate_memory(current, folio, curnid, thiscpu)) goto out; } if (curnid != polnid) ret = polnid; out: mpol_cond_put(pol); return ret; } /* * Drop the (possibly final) reference to task->mempolicy. It needs to be * dropped after task->mempolicy is set to NULL so that any allocation done as * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed * policy. */ void mpol_put_task_policy(struct task_struct *task) { struct mempolicy *pol; task_lock(task); pol = task->mempolicy; task->mempolicy = NULL; task_unlock(task); mpol_put(pol); } static void sp_delete(struct shared_policy *sp, struct sp_node *n) { rb_erase(&n->nd, &sp->root); sp_free(n); } static void sp_node_init(struct sp_node *node, unsigned long start, unsigned long end, struct mempolicy *pol) { node->start = start; node->end = end; node->policy = pol; } static struct sp_node *sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) { struct sp_node *n; struct mempolicy *newpol; n = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n) return NULL; newpol = mpol_dup(pol); if (IS_ERR(newpol)) { kmem_cache_free(sn_cache, n); return NULL; } newpol->flags |= MPOL_F_SHARED; sp_node_init(n, start, end, newpol); return n; } /* Replace a policy range. */ static int shared_policy_replace(struct shared_policy *sp, pgoff_t start, pgoff_t end, struct sp_node *new) { struct sp_node *n; struct sp_node *n_new = NULL; struct mempolicy *mpol_new = NULL; int ret = 0; restart: write_lock(&sp->lock); n = sp_lookup(sp, start, end); /* Take care of old policies in the same range. */ while (n && n->start < end) { struct rb_node *next = rb_next(&n->nd); if (n->start >= start) { if (n->end <= end) sp_delete(sp, n); else n->start = end; } else { /* Old policy spanning whole new range. */ if (n->end > end) { if (!n_new) goto alloc_new; *mpol_new = *n->policy; atomic_set(&mpol_new->refcnt, 1); sp_node_init(n_new, end, n->end, mpol_new); n->end = start; sp_insert(sp, n_new); n_new = NULL; mpol_new = NULL; break; } else n->end = start; } if (!next) break; n = rb_entry(next, struct sp_node, nd); } if (new) sp_insert(sp, new); write_unlock(&sp->lock); ret = 0; err_out: if (mpol_new) mpol_put(mpol_new); if (n_new) kmem_cache_free(sn_cache, n_new); return ret; alloc_new: write_unlock(&sp->lock); ret = -ENOMEM; n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n_new) goto err_out; mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; atomic_set(&mpol_new->refcnt, 1); goto restart; } /** * mpol_shared_policy_init - initialize shared policy for inode * @sp: pointer to inode shared policy * @mpol: struct mempolicy to install * * Install non-NULL @mpol in inode's shared policy rb-tree. * On entry, the current task has a reference on a non-NULL @mpol. * This must be released on exit. * This is called at get_inode() calls and we can use GFP_KERNEL. */ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { int ret; sp->root = RB_ROOT; /* empty tree == default mempolicy */ rwlock_init(&sp->lock); if (mpol) { struct sp_node *sn; struct mempolicy *npol; NODEMASK_SCRATCH(scratch); if (!scratch) goto put_mpol; /* contextualize the tmpfs mount point mempolicy to this file */ npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); if (IS_ERR(npol)) goto free_scratch; /* no valid nodemask intersection */ task_lock(current); ret = mpol_set_nodemask(npol, &mpol->w.user_nodemask, scratch); task_unlock(current); if (ret) goto put_npol; /* alloc node covering entire file; adds ref to file's npol */ sn = sp_alloc(0, MAX_LFS_FILESIZE >> PAGE_SHIFT, npol); if (sn) sp_insert(sp, sn); put_npol: mpol_put(npol); /* drop initial ref on file's npol */ free_scratch: NODEMASK_SCRATCH_FREE(scratch); put_mpol: mpol_put(mpol); /* drop our incoming ref on sb mpol */ } } int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *pol) { int err; struct sp_node *new = NULL; unsigned long sz = vma_pages(vma); if (pol) { new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, pol); if (!new) return -ENOMEM; } err = shared_policy_replace(sp, vma->vm_pgoff, vma->vm_pgoff + sz, new); if (err && new) sp_free(new); return err; } /* Free a backing policy store on inode delete. */ void mpol_free_shared_policy(struct shared_policy *sp) { struct sp_node *n; struct rb_node *next; if (!sp->root.rb_node) return; write_lock(&sp->lock); next = rb_first(&sp->root); while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); sp_delete(sp, n); } write_unlock(&sp->lock); } #ifdef CONFIG_NUMA_BALANCING static int __initdata numabalancing_override; static void __init check_numabalancing_enable(void) { bool numabalancing_default = false; if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED)) numabalancing_default = true; /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ if (numabalancing_override) set_numabalancing_state(numabalancing_override == 1); if (num_online_nodes() > 1 && !numabalancing_override) { pr_info("%s automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl\n", numabalancing_default ? "Enabling" : "Disabling"); set_numabalancing_state(numabalancing_default); } } static int __init setup_numabalancing(char *str) { int ret = 0; if (!str) goto out; if (!strcmp(str, "enable")) { numabalancing_override = 1; ret = 1; } else if (!strcmp(str, "disable")) { numabalancing_override = -1; ret = 1; } out: if (!ret) pr_warn("Unable to parse numa_balancing=\n"); return ret; } __setup("numa_balancing=", setup_numabalancing); #else static inline void __init check_numabalancing_enable(void) { } #endif /* CONFIG_NUMA_BALANCING */ void __init numa_policy_init(void) { nodemask_t interleave_nodes; unsigned long largest = 0; int nid, prefer = 0; policy_cache = kmem_cache_create("numa_policy", sizeof(struct mempolicy), 0, SLAB_PANIC, NULL); sn_cache = kmem_cache_create("shared_policy_node", sizeof(struct sp_node), 0, SLAB_PANIC, NULL); for_each_node(nid) { preferred_node_policy[nid] = (struct mempolicy) { .refcnt = ATOMIC_INIT(1), .mode = MPOL_PREFERRED, .flags = MPOL_F_MOF | MPOL_F_MORON, .nodes = nodemask_of_node(nid), }; } /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or * fall back to the largest node if they're all smaller. */ nodes_clear(interleave_nodes); for_each_node_state(nid, N_MEMORY) { unsigned long total_pages = node_present_pages(nid); /* Preserve the largest node */ if (largest < total_pages) { largest = total_pages; prefer = nid; } /* Interleave this node? */ if ((total_pages << PAGE_SHIFT) >= (16 << 20)) node_set(nid, interleave_nodes); } /* All too small, use the largest */ if (unlikely(nodes_empty(interleave_nodes))) node_set(prefer, interleave_nodes); if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) pr_err("%s: interleaving failed\n", __func__); check_numabalancing_enable(); } /* Reset policy of current process to default */ void numa_default_policy(void) { do_set_mempolicy(MPOL_DEFAULT, 0, NULL); } /* * Parse and format mempolicy from/to strings */ static const char * const policy_modes[] = { [MPOL_DEFAULT] = "default", [MPOL_PREFERRED] = "prefer", [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave", [MPOL_LOCAL] = "local", [MPOL_PREFERRED_MANY] = "prefer (many)", }; #ifdef CONFIG_TMPFS /** * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. * @str: string containing mempolicy to parse * @mpol: pointer to struct mempolicy pointer, returned on success. * * Format of input: * <mode>[=<flags>][:<nodelist>] * * Return: %0 on success, else %1 */ int mpol_parse_str(char *str, struct mempolicy **mpol) { struct mempolicy *new = NULL; unsigned short mode_flags; nodemask_t nodes; char *nodelist = strchr(str, ':'); char *flags = strchr(str, '='); int err = 1, mode; if (flags) *flags++ = '\0'; /* terminate mode string */ if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; if (nodelist_parse(nodelist, nodes)) goto out; if (!nodes_subset(nodes, node_states[N_MEMORY])) goto out; } else nodes_clear(nodes); mode = match_string(policy_modes, MPOL_MAX, str); if (mode < 0) goto out; switch (mode) { case MPOL_PREFERRED: /* * Insist on a nodelist of one node only, although later * we use first_node(nodes) to grab a single node, so here * nodelist (or nodes) cannot be empty. */ if (nodelist) { char *rest = nodelist; while (isdigit(*rest)) rest++; if (*rest) goto out; if (nodes_empty(nodes)) goto out; } break; case MPOL_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE: /* * Default to online nodes with memory if no nodelist */ if (!nodelist) nodes = node_states[N_MEMORY]; break; case MPOL_LOCAL: /* * Don't allow a nodelist; mpol_new() checks flags */ if (nodelist) goto out; break; case MPOL_DEFAULT: /* * Insist on a empty nodelist */ if (!nodelist) err = 0; goto out; case MPOL_PREFERRED_MANY: case MPOL_BIND: /* * Insist on a nodelist */ if (!nodelist) goto out; } mode_flags = 0; if (flags) { /* * Currently, we only support two mutually exclusive * mode flags. */ if (!strcmp(flags, "static")) mode_flags |= MPOL_F_STATIC_NODES; else if (!strcmp(flags, "relative")) mode_flags |= MPOL_F_RELATIVE_NODES; else goto out; } new = mpol_new(mode, mode_flags, &nodes); if (IS_ERR(new)) goto out; /* * Save nodes for mpol_to_str() to show the tmpfs mount options * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. */ if (mode != MPOL_PREFERRED) { new->nodes = nodes; } else if (nodelist) { nodes_clear(new->nodes); node_set(first_node(nodes), new->nodes); } else { new->mode = MPOL_LOCAL; } /* * Save nodes for contextualization: this will be used to "clone" * the mempolicy in a specific context [cpuset] at a later time. */ new->w.user_nodemask = nodes; err = 0; out: /* Restore string for error message */ if (nodelist) *--nodelist = ':'; if (flags) *--flags = '='; if (!err) *mpol = new; return err; } #endif /* CONFIG_TMPFS */ /** * mpol_to_str - format a mempolicy structure for printing * @buffer: to contain formatted mempolicy string * @maxlen: length of @buffer * @pol: pointer to mempolicy to be formatted * * Convert @pol into a string. If @buffer is too short, truncate the string. * Recommend a @maxlen of at least 51 for the longest mode, "weighted * interleave", plus the longest flag flags, "relative|balancing", and to * display at least a few node ids. */ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { char *p = buffer; nodemask_t nodes = NODE_MASK_NONE; unsigned short mode = MPOL_DEFAULT; unsigned short flags = 0; if (pol && pol != &default_policy && !(pol >= &preferred_node_policy[0] && pol <= &preferred_node_policy[ARRAY_SIZE(preferred_node_policy) - 1])) { mode = pol->mode; flags = pol->flags; } switch (mode) { case MPOL_DEFAULT: case MPOL_LOCAL: break; case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE: nodes = pol->nodes; break; default: WARN_ON_ONCE(1); snprintf(p, maxlen, "unknown"); return; } p += snprintf(p, maxlen, "%s", policy_modes[mode]); if (flags & MPOL_MODE_FLAGS) { p += snprintf(p, buffer + maxlen - p, "="); /* * Static and relative are mutually exclusive. */ if (flags & MPOL_F_STATIC_NODES) p += snprintf(p, buffer + maxlen - p, "static"); else if (flags & MPOL_F_RELATIVE_NODES) p += snprintf(p, buffer + maxlen - p, "relative"); if (flags & MPOL_F_NUMA_BALANCING) { if (!is_power_of_2(flags & MPOL_MODE_FLAGS)) p += snprintf(p, buffer + maxlen - p, "|"); p += snprintf(p, buffer + maxlen - p, "balancing"); } } if (!nodes_empty(nodes)) p += scnprintf(p, buffer + maxlen - p, ":%*pbl", nodemask_pr_args(&nodes)); } #ifdef CONFIG_SYSFS struct iw_node_attr { struct kobj_attribute kobj_attr; int nid; }; static ssize_t node_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct iw_node_attr *node_attr; u8 weight; node_attr = container_of(attr, struct iw_node_attr, kobj_attr); weight = get_il_weight(node_attr->nid); return sysfs_emit(buf, "%d\n", weight); } static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct iw_node_attr *node_attr; u8 *new; u8 *old; u8 weight = 0; node_attr = container_of(attr, struct iw_node_attr, kobj_attr); if (count == 0 || sysfs_streq(buf, "")) weight = 0; else if (kstrtou8(buf, 0, &weight)) return -EINVAL; new = kzalloc(nr_node_ids, GFP_KERNEL); if (!new) return -ENOMEM; mutex_lock(&iw_table_lock); old = rcu_dereference_protected(iw_table, lockdep_is_held(&iw_table_lock)); if (old) memcpy(new, old, nr_node_ids); new[node_attr->nid] = weight; rcu_assign_pointer(iw_table, new); mutex_unlock(&iw_table_lock); synchronize_rcu(); kfree(old); return count; } static struct iw_node_attr **node_attrs; static void sysfs_wi_node_release(struct iw_node_attr *node_attr, struct kobject *parent) { if (!node_attr) return; sysfs_remove_file(parent, &node_attr->kobj_attr.attr); kfree(node_attr->kobj_attr.attr.name); kfree(node_attr); } static void sysfs_wi_release(struct kobject *wi_kobj) { int i; for (i = 0; i < nr_node_ids; i++) sysfs_wi_node_release(node_attrs[i], wi_kobj); kobject_put(wi_kobj); } static const struct kobj_type wi_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = sysfs_wi_release, }; static int add_weight_node(int nid, struct kobject *wi_kobj) { struct iw_node_attr *node_attr; char *name; node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); if (!node_attr) return -ENOMEM; name = kasprintf(GFP_KERNEL, "node%d", nid); if (!name) { kfree(node_attr); return -ENOMEM; } sysfs_attr_init(&node_attr->kobj_attr.attr); node_attr->kobj_attr.attr.name = name; node_attr->kobj_attr.attr.mode = 0644; node_attr->kobj_attr.show = node_show; node_attr->kobj_attr.store = node_store; node_attr->nid = nid; if (sysfs_create_file(wi_kobj, &node_attr->kobj_attr.attr)) { kfree(node_attr->kobj_attr.attr.name); kfree(node_attr); pr_err("failed to add attribute to weighted_interleave\n"); return -ENOMEM; } node_attrs[nid] = node_attr; return 0; } static int add_weighted_interleave_group(struct kobject *root_kobj) { struct kobject *wi_kobj; int nid, err; wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); if (!wi_kobj) return -ENOMEM; err = kobject_init_and_add(wi_kobj, &wi_ktype, root_kobj, "weighted_interleave"); if (err) { kfree(wi_kobj); return err; } for_each_node_state(nid, N_POSSIBLE) { err = add_weight_node(nid, wi_kobj); if (err) { pr_err("failed to add sysfs [node%d]\n", nid); break; } } if (err) kobject_put(wi_kobj); return 0; } static void mempolicy_kobj_release(struct kobject *kobj) { u8 *old; mutex_lock(&iw_table_lock); old = rcu_dereference_protected(iw_table, lockdep_is_held(&iw_table_lock)); rcu_assign_pointer(iw_table, NULL); mutex_unlock(&iw_table_lock); synchronize_rcu(); kfree(old); kfree(node_attrs); kfree(kobj); } static const struct kobj_type mempolicy_ktype = { .release = mempolicy_kobj_release }; static int __init mempolicy_sysfs_init(void) { int err; static struct kobject *mempolicy_kobj; mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL); if (!mempolicy_kobj) { err = -ENOMEM; goto err_out; } node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *), GFP_KERNEL); if (!node_attrs) { err = -ENOMEM; goto mempol_out; } err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj, "mempolicy"); if (err) goto node_out; err = add_weighted_interleave_group(mempolicy_kobj); if (err) { pr_err("mempolicy sysfs structure failed to initialize\n"); kobject_put(mempolicy_kobj); return err; } return err; node_out: kfree(node_attrs); mempol_out: kfree(mempolicy_kobj); err_out: pr_err("failed to add mempolicy kobject to the system\n"); return err; } late_initcall(mempolicy_sysfs_init); #endif /* CONFIG_SYSFS */
201 14 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (c) 2001-2002 by David Brownell */ #ifndef __USB_CORE_HCD_H #define __USB_CORE_HCD_H #ifdef __KERNEL__ #include <linux/rwsem.h> #include <linux/interrupt.h> #include <linux/idr.h> #define MAX_TOPO_LEVEL 6 /* This file contains declarations of usbcore internals that are mostly * used or exposed by Host Controller Drivers. */ /* * USB Packet IDs (PIDs) */ #define USB_PID_EXT 0xf0 /* USB 2.0 LPM ECN */ #define USB_PID_OUT 0xe1 #define USB_PID_ACK 0xd2 #define USB_PID_DATA0 0xc3 #define USB_PID_PING 0xb4 /* USB 2.0 */ #define USB_PID_SOF 0xa5 #define USB_PID_NYET 0x96 /* USB 2.0 */ #define USB_PID_DATA2 0x87 /* USB 2.0 */ #define USB_PID_SPLIT 0x78 /* USB 2.0 */ #define USB_PID_IN 0x69 #define USB_PID_NAK 0x5a #define USB_PID_DATA1 0x4b #define USB_PID_PREAMBLE 0x3c /* Token mode */ #define USB_PID_ERR 0x3c /* USB 2.0: handshake mode */ #define USB_PID_SETUP 0x2d #define USB_PID_STALL 0x1e #define USB_PID_MDATA 0x0f /* USB 2.0 */ /*-------------------------------------------------------------------------*/ /* * USB Host Controller Driver (usb_hcd) framework * * Since "struct usb_bus" is so thin, you can't share much code in it. * This framework is a layer over that, and should be more shareable. */ /*-------------------------------------------------------------------------*/ struct giveback_urb_bh { bool running; bool high_prio; spinlock_t lock; struct list_head head; struct work_struct bh; struct usb_host_endpoint *completing_ep; }; enum usb_dev_authorize_policy { USB_DEVICE_AUTHORIZE_NONE = 0, USB_DEVICE_AUTHORIZE_ALL = 1, USB_DEVICE_AUTHORIZE_INTERNAL = 2, }; struct usb_hcd { /* * housekeeping */ struct usb_bus self; /* hcd is-a bus */ struct kref kref; /* reference counter */ const char *product_desc; /* product/vendor string */ int speed; /* Speed for this roothub. * May be different from * hcd->driver->flags & HCD_MASK */ char irq_descr[24]; /* driver + bus # */ struct timer_list rh_timer; /* drives root-hub polling */ struct urb *status_urb; /* the current status urb */ #ifdef CONFIG_PM struct work_struct wakeup_work; /* for remote wakeup */ #endif struct work_struct died_work; /* for when the device dies */ /* * hardware info/state */ const struct hc_driver *driver; /* hw-specific hooks */ /* * OTG and some Host controllers need software interaction with phys; * other external phys should be software-transparent */ struct usb_phy *usb_phy; struct usb_phy_roothub *phy_roothub; /* Flags that need to be manipulated atomically because they can * change while the host controller is running. Always use * set_bit() or clear_bit() to change their values. */ unsigned long flags; #define HCD_FLAG_HW_ACCESSIBLE 0 /* at full power */ #define HCD_FLAG_POLL_RH 2 /* poll for rh status? */ #define HCD_FLAG_POLL_PENDING 3 /* status has changed? */ #define HCD_FLAG_WAKEUP_PENDING 4 /* root hub is resuming? */ #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ #define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). */ #define HCD_HW_ACCESSIBLE(hcd) ((hcd)->flags & (1U << HCD_FLAG_HW_ACCESSIBLE)) #define HCD_POLL_RH(hcd) ((hcd)->flags & (1U << HCD_FLAG_POLL_RH)) #define HCD_POLL_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_POLL_PENDING)) #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) #define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default * or they require explicit user space authorization; this bit is * settable through /sys/class/usb_host/X/interface_authorized_default */ #define HCD_INTF_AUTHORIZED(hcd) \ ((hcd)->flags & (1U << HCD_FLAG_INTF_AUTHORIZED)) /* * Specifies if devices are authorized by default * or they require explicit user space authorization; this bit is * settable through /sys/class/usb_host/X/authorized_default */ enum usb_dev_authorize_policy dev_policy; /* Flags that get set only during HCD registration or removal. */ unsigned rh_registered:1;/* is root hub registered? */ unsigned rh_pollable:1; /* may we poll the root hub? */ unsigned msix_enabled:1; /* driver has MSI-X enabled? */ unsigned msi_enabled:1; /* driver has MSI enabled? */ /* * do not manage the PHY state in the HCD core, instead let the driver * handle this (for example if the PHY can only be turned on after a * specific event) */ unsigned skip_phy_initialization:1; /* The next flag is a stopgap, to be removed when all the HCDs * support the new root-hub polling mechanism. */ unsigned uses_new_polling:1; unsigned has_tt:1; /* Integrated TT in root hub */ unsigned amd_resume_bug:1; /* AMD remote wakeup quirk */ unsigned can_do_streams:1; /* HC supports streams */ unsigned tpl_support:1; /* OTG & EH TPL support */ unsigned cant_recv_wakeups:1; /* wakeup requests from downstream aren't received */ unsigned int irq; /* irq allocated */ void __iomem *regs; /* device memory/io */ resource_size_t rsrc_start; /* memory/io resource start */ resource_size_t rsrc_len; /* memory/io resource length */ unsigned power_budget; /* in mA, 0 = no limit */ struct giveback_urb_bh high_prio_bh; struct giveback_urb_bh low_prio_bh; /* bandwidth_mutex should be taken before adding or removing * any new bus bandwidth constraints: * 1. Before adding a configuration for a new device. * 2. Before removing the configuration to put the device into * the addressed state. * 3. Before selecting a different configuration. * 4. Before selecting an alternate interface setting. * * bandwidth_mutex should be dropped after a successful control message * to the device, or resetting the bandwidth after a failed attempt. */ struct mutex *address0_mutex; struct mutex *bandwidth_mutex; struct usb_hcd *shared_hcd; struct usb_hcd *primary_hcd; #define HCD_BUFFER_POOLS 4 struct dma_pool *pool[HCD_BUFFER_POOLS]; int state; # define __ACTIVE 0x01 # define __SUSPEND 0x04 # define __TRANSIENT 0x80 # define HC_STATE_HALT 0 # define HC_STATE_RUNNING (__ACTIVE) # define HC_STATE_QUIESCING (__SUSPEND|__TRANSIENT|__ACTIVE) # define HC_STATE_RESUMING (__SUSPEND|__TRANSIENT) # define HC_STATE_SUSPENDED (__SUSPEND) #define HC_IS_RUNNING(state) ((state) & __ACTIVE) #define HC_IS_SUSPENDED(state) ((state) & __SUSPEND) /* memory pool for HCs having local memory, or %NULL */ struct gen_pool *localmem_pool; /* more shared queuing code would be good; it should support * smarter scheduling, handle transaction translators, etc; * input size of periodic table to an interrupt scheduler. * (ohci 32, uhci 1024, ehci 256/512/1024). */ /* The HC driver's private data is stored at the end of * this structure. */ unsigned long hcd_priv[] __attribute__ ((aligned(sizeof(s64)))); }; /* 2.4 does this a bit differently ... */ static inline struct usb_bus *hcd_to_bus(struct usb_hcd *hcd) { return &hcd->self; } static inline struct usb_hcd *bus_to_hcd(struct usb_bus *bus) { return container_of(bus, struct usb_hcd, self); } /*-------------------------------------------------------------------------*/ struct hc_driver { const char *description; /* "ehci-hcd" etc */ const char *product_desc; /* product/vendor string */ size_t hcd_priv_size; /* size of private data */ /* irq handler */ irqreturn_t (*irq) (struct usb_hcd *hcd); int flags; #define HCD_MEMORY 0x0001 /* HC regs use memory (else I/O) */ #define HCD_DMA 0x0002 /* HC uses DMA */ #define HCD_SHARED 0x0004 /* Two (or more) usb_hcds share HW */ #define HCD_USB11 0x0010 /* USB 1.1 */ #define HCD_USB2 0x0020 /* USB 2.0 */ #define HCD_USB3 0x0040 /* USB 3.0 */ #define HCD_USB31 0x0050 /* USB 3.1 */ #define HCD_USB32 0x0060 /* USB 3.2 */ #define HCD_MASK 0x0070 #define HCD_BH 0x0100 /* URB complete in BH context */ /* called to init HCD and root hub */ int (*reset) (struct usb_hcd *hcd); int (*start) (struct usb_hcd *hcd); /* NOTE: these suspend/resume calls relate to the HC as * a whole, not just the root hub; they're for PCI bus glue. */ /* called after suspending the hub, before entering D3 etc */ int (*pci_suspend)(struct usb_hcd *hcd, bool do_wakeup); /* called after entering D0 (etc), before resuming the hub */ int (*pci_resume)(struct usb_hcd *hcd, pm_message_t state); /* called just before hibernate final D3 state, allows host to poweroff parts */ int (*pci_poweroff_late)(struct usb_hcd *hcd, bool do_wakeup); /* cleanly make HCD stop writing memory and doing I/O */ void (*stop) (struct usb_hcd *hcd); /* shutdown HCD */ void (*shutdown) (struct usb_hcd *hcd); /* return current frame number */ int (*get_frame_number) (struct usb_hcd *hcd); /* manage i/o requests, device state */ int (*urb_enqueue)(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags); int (*urb_dequeue)(struct usb_hcd *hcd, struct urb *urb, int status); /* * (optional) these hooks allow an HCD to override the default DMA * mapping and unmapping routines. In general, they shouldn't be * necessary unless the host controller has special DMA requirements, * such as alignment constraints. If these are not specified, the * general usb_hcd_(un)?map_urb_for_dma functions will be used instead * (and it may be a good idea to call these functions in your HCD * implementation) */ int (*map_urb_for_dma)(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags); void (*unmap_urb_for_dma)(struct usb_hcd *hcd, struct urb *urb); /* hw synch, freeing endpoint resources that urb_dequeue can't */ void (*endpoint_disable)(struct usb_hcd *hcd, struct usb_host_endpoint *ep); /* (optional) reset any endpoint state such as sequence number and current window */ void (*endpoint_reset)(struct usb_hcd *hcd, struct usb_host_endpoint *ep); /* root hub support */ int (*hub_status_data) (struct usb_hcd *hcd, char *buf); int (*hub_control) (struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength); int (*bus_suspend)(struct usb_hcd *); int (*bus_resume)(struct usb_hcd *); int (*start_port_reset)(struct usb_hcd *, unsigned port_num); unsigned long (*get_resuming_ports)(struct usb_hcd *); /* force handover of high-speed port to full-speed companion */ void (*relinquish_port)(struct usb_hcd *, int); /* has a port been handed over to a companion? */ int (*port_handed_over)(struct usb_hcd *, int); /* CLEAR_TT_BUFFER completion callback */ void (*clear_tt_buffer_complete)(struct usb_hcd *, struct usb_host_endpoint *); /* xHCI specific functions */ /* Called by usb_alloc_dev to alloc HC device structures */ int (*alloc_dev)(struct usb_hcd *, struct usb_device *); /* Called by usb_disconnect to free HC device structures */ void (*free_dev)(struct usb_hcd *, struct usb_device *); /* Change a group of bulk endpoints to support multiple stream IDs */ int (*alloc_streams)(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, unsigned int num_streams, gfp_t mem_flags); /* Reverts a group of bulk endpoints back to not using stream IDs. * Can fail if we run out of memory. */ int (*free_streams)(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, gfp_t mem_flags); /* Bandwidth computation functions */ /* Note that add_endpoint() can only be called once per endpoint before * check_bandwidth() or reset_bandwidth() must be called. * drop_endpoint() can only be called once per endpoint also. * A call to xhci_drop_endpoint() followed by a call to * xhci_add_endpoint() will add the endpoint to the schedule with * possibly new parameters denoted by a different endpoint descriptor * in usb_host_endpoint. A call to xhci_add_endpoint() followed by a * call to xhci_drop_endpoint() is not allowed. */ /* Allocate endpoint resources and add them to a new schedule */ int (*add_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *); /* Drop an endpoint from a new schedule */ int (*drop_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *); /* Check that a new hardware configuration, set using * endpoint_enable and endpoint_disable, does not exceed bus * bandwidth. This must be called before any set configuration * or set interface requests are sent to the device. */ int (*check_bandwidth)(struct usb_hcd *, struct usb_device *); /* Reset the device schedule to the last known good schedule, * which was set from a previous successful call to * check_bandwidth(). This reverts any add_endpoint() and * drop_endpoint() calls since that last successful call. * Used for when a check_bandwidth() call fails due to resource * or bandwidth constraints. */ void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); /* Set the hardware-chosen device address */ int (*address_device)(struct usb_hcd *, struct usb_device *udev, unsigned int timeout_ms); /* prepares the hardware to send commands to the device */ int (*enable_device)(struct usb_hcd *, struct usb_device *udev); /* Notifies the HCD after a hub descriptor is fetched. * Will block. */ int (*update_hub_device)(struct usb_hcd *, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags); int (*reset_device)(struct usb_hcd *, struct usb_device *); /* Notifies the HCD after a device is connected and its * address is set */ int (*update_device)(struct usb_hcd *, struct usb_device *); int (*set_usb2_hw_lpm)(struct usb_hcd *, struct usb_device *, int); /* USB 3.0 Link Power Management */ /* Returns the USB3 hub-encoded value for the U1/U2 timeout. */ int (*enable_usb3_lpm_timeout)(struct usb_hcd *, struct usb_device *, enum usb3_link_state state); /* The xHCI host controller can still fail the command to * disable the LPM timeouts, so this can return an error code. */ int (*disable_usb3_lpm_timeout)(struct usb_hcd *, struct usb_device *, enum usb3_link_state state); int (*find_raw_port_number)(struct usb_hcd *, int); /* Call for power on/off the port if necessary */ int (*port_power)(struct usb_hcd *hcd, int portnum, bool enable); /* Call for SINGLE_STEP_SET_FEATURE Test for USB2 EH certification */ #define EHSET_TEST_SINGLE_STEP_SET_FEATURE 0x06 int (*submit_single_step_set_feature)(struct usb_hcd *, struct urb *, int); }; static inline int hcd_giveback_urb_in_bh(struct usb_hcd *hcd) { return hcd->driver->flags & HCD_BH; } static inline bool hcd_periodic_completion_in_progress(struct usb_hcd *hcd, struct usb_host_endpoint *ep) { return hcd->high_prio_bh.completing_ep == ep; } static inline bool hcd_uses_dma(struct usb_hcd *hcd) { return IS_ENABLED(CONFIG_HAS_DMA) && (hcd->driver->flags & HCD_DMA); } extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb); extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb, int status); extern void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb); extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_hcd_unlink_urb(struct urb *urb, int status); extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status); extern int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags); extern void usb_hcd_unmap_urb_setup_for_dma(struct usb_hcd *, struct urb *); extern void usb_hcd_unmap_urb_for_dma(struct usb_hcd *, struct urb *); extern void usb_hcd_flush_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); extern void usb_hcd_disable_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); extern void usb_hcd_reset_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); extern void usb_hcd_synchronize_unlinks(struct usb_device *udev); extern int usb_hcd_alloc_bandwidth(struct usb_device *udev, struct usb_host_config *new_config, struct usb_host_interface *old_alt, struct usb_host_interface *new_alt); extern int usb_hcd_get_frame_number(struct usb_device *udev); struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver, struct device *sysdev, struct device *dev, const char *bus_name, struct usb_hcd *primary_hcd); extern struct usb_hcd *usb_create_hcd(const struct hc_driver *driver, struct device *dev, const char *bus_name); extern struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, struct device *dev, const char *bus_name, struct usb_hcd *shared_hcd); extern struct usb_hcd *usb_get_hcd(struct usb_hcd *hcd); extern void usb_put_hcd(struct usb_hcd *hcd); extern int usb_hcd_is_primary_hcd(struct usb_hcd *hcd); extern int usb_add_hcd(struct usb_hcd *hcd, unsigned int irqnum, unsigned long irqflags); extern void usb_remove_hcd(struct usb_hcd *hcd); extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1); int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr, dma_addr_t dma, size_t size); struct platform_device; extern void usb_hcd_platform_shutdown(struct platform_device *dev); #ifdef CONFIG_USB_HCD_TEST_MODE extern int ehset_single_step_set_feature(struct usb_hcd *hcd, int port); #else static inline int ehset_single_step_set_feature(struct usb_hcd *hcd, int port) { return 0; } #endif /* CONFIG_USB_HCD_TEST_MODE */ #ifdef CONFIG_USB_PCI struct pci_dev; struct pci_device_id; extern int usb_hcd_pci_probe(struct pci_dev *dev, const struct hc_driver *driver); extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); #ifdef CONFIG_USB_PCI_AMD extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, const struct hc_driver *driver) { if (!usb_hcd_amd_remote_wakeup_quirk(dev)) return false; if (driver->flags & (HCD_USB11 | HCD_USB3)) return true; return false; } #else /* CONFIG_USB_PCI_AMD */ static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, const struct hc_driver *driver) { return false; } #endif extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif /* CONFIG_USB_PCI */ /* pci-ish (pdev null is ok) buffer alloc/mapping support */ void usb_init_pool_max(void); int hcd_buffer_create(struct usb_hcd *hcd); void hcd_buffer_destroy(struct usb_hcd *hcd); void *hcd_buffer_alloc(struct usb_bus *bus, size_t size, gfp_t mem_flags, dma_addr_t *dma); void hcd_buffer_free(struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma); void *hcd_buffer_alloc_pages(struct usb_hcd *hcd, size_t size, gfp_t mem_flags, dma_addr_t *dma); void hcd_buffer_free_pages(struct usb_hcd *hcd, size_t size, void *addr, dma_addr_t dma); /* generic bus glue, needed for host controllers that don't use PCI */ extern irqreturn_t usb_hcd_irq(int irq, void *__hcd); extern void usb_hc_died(struct usb_hcd *hcd); extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd); extern void usb_wakeup_notification(struct usb_device *hdev, unsigned int portnum); extern void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum); extern void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum); /* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */ #define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1) #define usb_dotoggle(dev, ep, out) ((dev)->toggle[out] ^= (1 << (ep))) #define usb_settoggle(dev, ep, out, bit) \ ((dev)->toggle[out] = ((dev)->toggle[out] & ~(1 << (ep))) | \ ((bit) << (ep))) /* -------------------------------------------------------------------------- */ /* Enumeration is only for the hub driver, or HCD virtual root hubs */ extern struct usb_device *usb_alloc_dev(struct usb_device *parent, struct usb_bus *, unsigned port); extern int usb_new_device(struct usb_device *dev); extern void usb_disconnect(struct usb_device **); extern int usb_get_configuration(struct usb_device *dev); extern void usb_destroy_configuration(struct usb_device *dev); /*-------------------------------------------------------------------------*/ /* * HCD Root Hub support */ #include <linux/usb/ch11.h> /* * As of USB 2.0, full/low speed devices are segregated into trees. * One type grows from USB 1.1 host controllers (OHCI, UHCI etc). * The other type grows from high speed hubs when they connect to * full/low speed devices using "Transaction Translators" (TTs). * * TTs should only be known to the hub driver, and high speed bus * drivers (only EHCI for now). They affect periodic scheduling and * sometimes control/bulk error recovery. */ struct usb_device; struct usb_tt { struct usb_device *hub; /* upstream highspeed hub */ int multi; /* true means one TT per port */ unsigned think_time; /* think time in ns */ void *hcpriv; /* HCD private data */ /* for control/bulk error recovery (CLEAR_TT_BUFFER) */ spinlock_t lock; struct list_head clear_list; /* of usb_tt_clear */ struct work_struct clear_work; }; struct usb_tt_clear { struct list_head clear_list; unsigned tt; u16 devinfo; struct usb_hcd *hcd; struct usb_host_endpoint *ep; }; extern int usb_hub_clear_tt_buffer(struct urb *urb); extern void usb_ep0_reinit(struct usb_device *); /* (shifted) direction/type/recipient from the USB 2.0 spec, table 9.2 */ #define DeviceRequest \ ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8) #define DeviceOutRequest \ ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8) #define InterfaceRequest \ ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) #define EndpointRequest \ ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) #define EndpointOutRequest \ ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) /* class requests from the USB 2.0 hub spec, table 11-15 */ #define HUB_CLASS_REQ(dir, type, request) ((((dir) | (type)) << 8) | (request)) /* GetBusState and SetHubDescriptor are optional, omitted */ #define ClearHubFeature HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, USB_REQ_CLEAR_FEATURE) #define ClearPortFeature HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, USB_REQ_CLEAR_FEATURE) #define GetHubDescriptor HUB_CLASS_REQ(USB_DIR_IN, USB_RT_HUB, USB_REQ_GET_DESCRIPTOR) #define GetHubStatus HUB_CLASS_REQ(USB_DIR_IN, USB_RT_HUB, USB_REQ_GET_STATUS) #define GetPortStatus HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, USB_REQ_GET_STATUS) #define SetHubFeature HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, USB_REQ_SET_FEATURE) #define SetPortFeature HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, USB_REQ_SET_FEATURE) #define ClearTTBuffer HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_CLEAR_TT_BUFFER) #define ResetTT HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_RESET_TT) #define GetTTState HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, HUB_GET_TT_STATE) #define StopTT HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_STOP_TT) /*-------------------------------------------------------------------------*/ /* class requests from USB 3.1 hub spec, table 10-7 */ #define SetHubDepth HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, HUB_SET_DEPTH) #define GetPortErrorCount HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, HUB_GET_PORT_ERR_COUNT) /* * Generic bandwidth allocation constants/support */ #define FRAME_TIME_USECS 1000L #define BitTime(bytecount) (7 * 8 * bytecount / 6) /* with integer truncation */ /* Trying not to use worst-case bit-stuffing * of (7/6 * 8 * bytecount) = 9.33 * bytecount */ /* bytecount = data payload byte count */ #define NS_TO_US(ns) DIV_ROUND_UP(ns, 1000L) /* convert nanoseconds to microseconds, rounding up */ /* * Full/low speed bandwidth allocation constants/support. */ #define BW_HOST_DELAY 1000L /* nanoseconds */ #define BW_HUB_LS_SETUP 333L /* nanoseconds */ /* 4 full-speed bit times (est.) */ #define FRAME_TIME_BITS 12000L /* frame = 1 millisecond */ #define FRAME_TIME_MAX_BITS_ALLOC (90L * FRAME_TIME_BITS / 100L) #define FRAME_TIME_MAX_USECS_ALLOC (90L * FRAME_TIME_USECS / 100L) /* * Ceiling [nano/micro]seconds (typical) for that many bytes at high speed * ISO is a bit less, no ACK ... from USB 2.0 spec, 5.11.3 (and needed * to preallocate bandwidth) */ #define USB2_HOST_DELAY 5 /* nsec, guess */ #define HS_NSECS(bytes) (((55 * 8 * 2083) \ + (2083UL * (3 + BitTime(bytes))))/1000 \ + USB2_HOST_DELAY) #define HS_NSECS_ISO(bytes) (((38 * 8 * 2083) \ + (2083UL * (3 + BitTime(bytes))))/1000 \ + USB2_HOST_DELAY) #define HS_USECS(bytes) NS_TO_US(HS_NSECS(bytes)) #define HS_USECS_ISO(bytes) NS_TO_US(HS_NSECS_ISO(bytes)) extern long usb_calc_bus_time(int speed, int is_input, int isoc, int bytecount); /*-------------------------------------------------------------------------*/ extern void usb_set_device_state(struct usb_device *udev, enum usb_device_state new_state); /*-------------------------------------------------------------------------*/ /* exported only within usbcore */ extern struct idr usb_bus_idr; extern struct mutex usb_bus_idr_lock; extern wait_queue_head_t usb_kill_urb_queue; #define usb_endpoint_out(ep_dir) (!((ep_dir) & USB_DIR_IN)) #ifdef CONFIG_PM extern unsigned usb_wakeup_enabled_descendants(struct usb_device *udev); extern void usb_root_hub_lost_power(struct usb_device *rhdev); extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg); extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg); extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd); #else static inline unsigned usb_wakeup_enabled_descendants(struct usb_device *udev) { return 0; } static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd) { return; } #endif /* CONFIG_PM */ /*-------------------------------------------------------------------------*/ #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE) struct usb_mon_operations { void (*urb_submit)(struct usb_bus *bus, struct urb *urb); void (*urb_submit_error)(struct usb_bus *bus, struct urb *urb, int err); void (*urb_complete)(struct usb_bus *bus, struct urb *urb, int status); /* void (*urb_unlink)(struct usb_bus *bus, struct urb *urb); */ }; extern const struct usb_mon_operations *mon_ops; static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb) { if (bus->monitored) (*mon_ops->urb_submit)(bus, urb); } static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb, int error) { if (bus->monitored) (*mon_ops->urb_submit_error)(bus, urb, error); } static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb, int status) { if (bus->monitored) (*mon_ops->urb_complete)(bus, urb, status); } int usb_mon_register(const struct usb_mon_operations *ops); void usb_mon_deregister(void); #else static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb) {} static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb, int error) {} static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb, int status) {} #endif /* CONFIG_USB_MON || CONFIG_USB_MON_MODULE */ /*-------------------------------------------------------------------------*/ /* random stuff */ /* This rwsem is for use only by the hub driver and ehci-hcd. * Nobody else should touch it. */ extern struct rw_semaphore ehci_cf_port_reset_rwsem; /* Keep track of which host controller drivers are loaded */ #define USB_UHCI_LOADED 0 #define USB_OHCI_LOADED 1 #define USB_EHCI_LOADED 2 extern unsigned long usb_hcds_loaded; #endif /* __KERNEL__ */ #endif /* __USB_CORE_HCD_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 /* SPDX-License-Identifier: GPL-2.0 */ /* * PHY device list allow maintaining a list of PHY devices that are * part of a netdevice's link topology. PHYs can for example be chained, * as is the case when using a PHY that exposes an SFP module, on which an * SFP transceiver that embeds a PHY is connected. * * This list can then be used by userspace to leverage individual PHY * capabilities. */ #ifndef __PHY_LINK_TOPOLOGY_H #define __PHY_LINK_TOPOLOGY_H #include <linux/ethtool.h> #include <linux/netdevice.h> struct xarray; struct phy_device; struct sfp_bus; struct phy_link_topology { struct xarray phys; u32 next_phy_index; }; struct phy_device_node { enum phy_upstream upstream_type; union { struct net_device *netdev; struct phy_device *phydev; } upstream; struct sfp_bus *parent_sfp_bus; struct phy_device *phy; }; #if IS_ENABLED(CONFIG_PHYLIB) int phy_link_topo_add_phy(struct net_device *dev, struct phy_device *phy, enum phy_upstream upt, void *upstream); void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy); static inline struct phy_device * phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) { struct phy_link_topology *topo = dev->link_topo; struct phy_device_node *pdn; if (!topo) return NULL; pdn = xa_load(&topo->phys, phyindex); if (pdn) return pdn->phy; return NULL; } #else static inline int phy_link_topo_add_phy(struct net_device *dev, struct phy_device *phy, enum phy_upstream upt, void *upstream) { return 0; } static inline void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy) { } static inline struct phy_device * phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) { return NULL; } #endif #endif /* __PHY_LINK_TOPOLOGY_H */
118 84 26 26 96 96 30 26 26 26 26 26 26 26 118 118 118 118 28 43 117 118 118 118 118 118 7 3 118 118 118 96 84 118 26 26 118 118 275 272 22 122 271 275 30 30 30 30 30 30 30 30 30 30 142 142 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 // SPDX-License-Identifier: GPL-2.0 #define pr_fmt(fmt) "%s() " fmt "\n", __func__ #include <linux/generic-radix-tree.h> #include <linux/mm.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/srcu.h> #include <linux/vmalloc.h> #include "rcu_pending.h" #include "darray.h" #include "util.h" #define static_array_for_each(_a, _i) \ for (typeof(&(_a)[0]) _i = _a; \ _i < (_a) + ARRAY_SIZE(_a); \ _i++) enum rcu_pending_special { RCU_PENDING_KVFREE = 1, RCU_PENDING_CALL_RCU = 2, }; #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE) #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU) #ifdef __KERNEL__ typedef unsigned long rcu_gp_poll_state_t; static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) { return l == r; } #else typedef struct urcu_gp_poll_state rcu_gp_poll_state_t; static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) { return l.grace_period_id == r.grace_period_id; } #endif static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp) { return ssp ? get_state_synchronize_srcu(ssp) : get_state_synchronize_rcu(); } static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp) { return ssp ? start_poll_synchronize_srcu(ssp) : start_poll_synchronize_rcu(); } static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie) { return ssp ? poll_state_synchronize_srcu(ssp, cookie) : poll_state_synchronize_rcu(cookie); } static inline void __rcu_barrier(struct srcu_struct *ssp) { return ssp ? srcu_barrier(ssp) : rcu_barrier(); } static inline void __call_rcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func) { if (ssp) call_srcu(ssp, rhp, func); else call_rcu(rhp, func); } struct rcu_pending_seq { /* * We're using a radix tree like a vector - we're just pushing elements * onto the end; we're using a radix tree instead of an actual vector to * avoid reallocation overhead */ GENRADIX(struct rcu_head *) objs; size_t nr; struct rcu_head **cursor; rcu_gp_poll_state_t seq; }; struct rcu_pending_list { struct rcu_head *head; struct rcu_head *tail; rcu_gp_poll_state_t seq; }; struct rcu_pending_pcpu { struct rcu_pending *parent; spinlock_t lock; int cpu; /* * We can't bound the number of unprocessed gp sequence numbers, and we * can't efficiently merge radix trees for expired grace periods, so we * need darray/vector: */ DARRAY_PREALLOCATED(struct rcu_pending_seq, 4) objs; /* Third entry is for expired objects: */ struct rcu_pending_list lists[NUM_ACTIVE_RCU_POLL_OLDSTATE + 1]; struct rcu_head cb; bool cb_armed; struct work_struct work; }; static bool __rcu_pending_has_pending(struct rcu_pending_pcpu *p) { if (p->objs.nr) return true; static_array_for_each(p->lists, i) if (i->head) return true; return false; } static void rcu_pending_list_merge(struct rcu_pending_list *l1, struct rcu_pending_list *l2) { #ifdef __KERNEL__ if (!l1->head) l1->head = l2->head; else l1->tail->next = l2->head; #else if (!l1->head) l1->head = l2->head; else l1->tail->next.next = (void *) l2->head; #endif l1->tail = l2->tail; l2->head = l2->tail = NULL; } static void rcu_pending_list_add(struct rcu_pending_list *l, struct rcu_head *n) { #ifdef __KERNEL__ if (!l->head) l->head = n; else l->tail->next = n; l->tail = n; n->next = NULL; #else if (!l->head) l->head = n; else l->tail->next.next = (void *) n; l->tail = n; n->next.next = NULL; #endif } static void merge_expired_lists(struct rcu_pending_pcpu *p) { struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE]; for (struct rcu_pending_list *i = p->lists; i < expired; i++) if (i->head && __poll_state_synchronize_rcu(p->parent->srcu, i->seq)) rcu_pending_list_merge(expired, i); } #ifndef __KERNEL__ static inline void kfree_bulk(size_t nr, void ** p) { while (nr--) kfree(*p); } #define local_irq_save(flags) \ do { \ flags = 0; \ } while (0) #endif static noinline void __process_finished_items(struct rcu_pending *pending, struct rcu_pending_pcpu *p, unsigned long flags) { struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE]; struct rcu_pending_seq objs = {}; struct rcu_head *list = NULL; if (p->objs.nr && __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) { objs = p->objs.data[0]; darray_remove_item(&p->objs, p->objs.data); } merge_expired_lists(p); list = expired->head; expired->head = expired->tail = NULL; spin_unlock_irqrestore(&p->lock, flags); switch ((ulong) pending->process) { case RCU_PENDING_KVFREE: for (size_t i = 0; i < objs.nr; ) { size_t nr_this_node = min(GENRADIX_NODE_SIZE / sizeof(void *), objs.nr - i); kfree_bulk(nr_this_node, (void **) genradix_ptr(&objs.objs, i)); i += nr_this_node; } genradix_free(&objs.objs); while (list) { struct rcu_head *obj = list; #ifdef __KERNEL__ list = obj->next; #else list = (void *) obj->next.next; #endif /* * low bit of pointer indicates whether rcu_head needs * to be freed - kvfree_rcu_mightsleep() */ BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0); void *ptr = (void *)(((unsigned long) obj->func) & ~1UL); bool free_head = ((unsigned long) obj->func) & 1UL; kvfree(ptr); if (free_head) kfree(obj); } break; case RCU_PENDING_CALL_RCU: for (size_t i = 0; i < objs.nr; i++) { struct rcu_head *obj = *genradix_ptr(&objs.objs, i); obj->func(obj); } genradix_free(&objs.objs); while (list) { struct rcu_head *obj = list; #ifdef __KERNEL__ list = obj->next; #else list = (void *) obj->next.next; #endif obj->func(obj); } break; default: for (size_t i = 0; i < objs.nr; i++) pending->process(pending, *genradix_ptr(&objs.objs, i)); genradix_free(&objs.objs); while (list) { struct rcu_head *obj = list; #ifdef __KERNEL__ list = obj->next; #else list = (void *) obj->next.next; #endif pending->process(pending, obj); } break; } } static bool process_finished_items(struct rcu_pending *pending, struct rcu_pending_pcpu *p, unsigned long flags) { /* * XXX: we should grab the gp seq once and avoid multiple function * calls, this is called from __rcu_pending_enqueue() fastpath in * may_sleep==true mode */ if ((p->objs.nr && __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) || (p->lists[0].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[0].seq)) || (p->lists[1].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[1].seq)) || p->lists[2].head) { __process_finished_items(pending, p, flags); return true; } return false; } static void rcu_pending_work(struct work_struct *work) { struct rcu_pending_pcpu *p = container_of(work, struct rcu_pending_pcpu, work); struct rcu_pending *pending = p->parent; unsigned long flags; do { spin_lock_irqsave(&p->lock, flags); } while (process_finished_items(pending, p, flags)); spin_unlock_irqrestore(&p->lock, flags); } static void rcu_pending_rcu_cb(struct rcu_head *rcu) { struct rcu_pending_pcpu *p = container_of(rcu, struct rcu_pending_pcpu, cb); schedule_work_on(p->cpu, &p->work); unsigned long flags; spin_lock_irqsave(&p->lock, flags); if (__rcu_pending_has_pending(p)) { spin_unlock_irqrestore(&p->lock, flags); __call_rcu(p->parent->srcu, &p->cb, rcu_pending_rcu_cb); } else { p->cb_armed = false; spin_unlock_irqrestore(&p->lock, flags); } } static __always_inline struct rcu_pending_seq * get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq) { darray_for_each_reverse(p->objs, objs) if (rcu_gp_poll_cookie_eq(objs->seq, seq)) return objs; if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC)) return NULL; return &darray_last(p->objs); } static noinline bool rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq, struct rcu_head *head, void *ptr, unsigned long *flags) { if (ptr) { if (!head) { /* * kvfree_rcu_mightsleep(): we weren't passed an * rcu_head, but we need one: use the low bit of the * ponter to free to flag that the head needs to be * freed as well: */ ptr = (void *)(((unsigned long) ptr)|1UL); head = kmalloc(sizeof(*head), __GFP_NOWARN); if (!head) { spin_unlock_irqrestore(&p->lock, *flags); head = kmalloc(sizeof(*head), GFP_KERNEL|__GFP_NOFAIL); /* * dropped lock, did GFP_KERNEL allocation, * check for gp expiration */ if (unlikely(__poll_state_synchronize_rcu(p->parent->srcu, seq))) { kvfree(--ptr); kfree(head); spin_lock_irqsave(&p->lock, *flags); return false; } } } head->func = ptr; } again: for (struct rcu_pending_list *i = p->lists; i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { if (rcu_gp_poll_cookie_eq(i->seq, seq)) { rcu_pending_list_add(i, head); return false; } } for (struct rcu_pending_list *i = p->lists; i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { if (!i->head) { i->seq = seq; rcu_pending_list_add(i, head); return true; } } merge_expired_lists(p); goto again; } /* * __rcu_pending_enqueue: enqueue a pending RCU item, to be processed (via * pending->pracess) once grace period elapses. * * Attempt to enqueue items onto a radix tree; if memory allocation fails, fall * back to a linked list. * * - If @ptr is NULL, we're enqueuing an item for a generic @pending with a * process callback * * - If @ptr and @head are both not NULL, we're kvfree_rcu() * * - If @ptr is not NULL and @head is, we're kvfree_rcu_mightsleep() * * - If @may_sleep is true, will do GFP_KERNEL memory allocations and process * expired items. */ static __always_inline void __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, void *ptr, bool may_sleep) { struct rcu_pending_pcpu *p; struct rcu_pending_seq *objs; struct genradix_node *new_node = NULL; unsigned long flags; bool start_gp = false; BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); local_irq_save(flags); p = this_cpu_ptr(pending->p); spin_lock(&p->lock); rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); restart: if (may_sleep && unlikely(process_finished_items(pending, p, flags))) goto check_expired; /* * In kvfree_rcu() mode, the radix tree is only for slab pointers so * that we can do kfree_bulk() - vmalloc pointers always use the linked * list: */ if (ptr && unlikely(is_vmalloc_addr(ptr))) goto list_add; objs = get_object_radix(p, seq); if (unlikely(!objs)) goto list_add; if (unlikely(!objs->cursor)) { /* * New radix tree nodes must be added under @p->lock because the * tree root is in a darray that can be resized (typically, * genradix supports concurrent unlocked allocation of new * nodes) - hence preallocation and the retry loop: */ objs->cursor = genradix_ptr_alloc_preallocated_inlined(&objs->objs, objs->nr, &new_node, GFP_ATOMIC|__GFP_NOWARN); if (unlikely(!objs->cursor)) { if (may_sleep) { spin_unlock_irqrestore(&p->lock, flags); gfp_t gfp = GFP_KERNEL; if (!head) gfp |= __GFP_NOFAIL; new_node = genradix_alloc_node(gfp); if (!new_node) may_sleep = false; goto check_expired; } list_add: start_gp = rcu_pending_enqueue_list(p, seq, head, ptr, &flags); goto start_gp; } } *objs->cursor++ = ptr ?: head; /* zero cursor if we hit the end of a radix tree node: */ if (!(((ulong) objs->cursor) & (GENRADIX_NODE_SIZE - 1))) objs->cursor = NULL; start_gp = !objs->nr; objs->nr++; start_gp: if (unlikely(start_gp)) { /* * We only have one callback (ideally, we would have one for * every outstanding graceperiod) - so if our callback is * already in flight, we may still have to start a grace period * (since we used get_state() above, not start_poll()) */ if (!p->cb_armed) { p->cb_armed = true; __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); } else { __start_poll_synchronize_rcu(pending->srcu); } } spin_unlock_irqrestore(&p->lock, flags); free_node: if (new_node) genradix_free_node(new_node); return; check_expired: if (unlikely(__poll_state_synchronize_rcu(pending->srcu, seq))) { switch ((ulong) pending->process) { case RCU_PENDING_KVFREE: kvfree(ptr); break; case RCU_PENDING_CALL_RCU: head->func(head); break; default: pending->process(pending, head); break; } goto free_node; } local_irq_save(flags); p = this_cpu_ptr(pending->p); spin_lock(&p->lock); goto restart; } void rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *obj) { __rcu_pending_enqueue(pending, obj, NULL, true); } static struct rcu_head *rcu_pending_pcpu_dequeue(struct rcu_pending_pcpu *p) { struct rcu_head *ret = NULL; spin_lock_irq(&p->lock); darray_for_each(p->objs, objs) if (objs->nr) { ret = *genradix_ptr(&objs->objs, --objs->nr); objs->cursor = NULL; if (!objs->nr) genradix_free(&objs->objs); goto out; } static_array_for_each(p->lists, i) if (i->head) { ret = i->head; #ifdef __KERNEL__ i->head = ret->next; #else i->head = (void *) ret->next.next; #endif if (!i->head) i->tail = NULL; goto out; } out: spin_unlock_irq(&p->lock); return ret; } struct rcu_head *rcu_pending_dequeue(struct rcu_pending *pending) { return rcu_pending_pcpu_dequeue(raw_cpu_ptr(pending->p)); } struct rcu_head *rcu_pending_dequeue_from_all(struct rcu_pending *pending) { struct rcu_head *ret = rcu_pending_dequeue(pending); if (ret) return ret; int cpu; for_each_possible_cpu(cpu) { ret = rcu_pending_pcpu_dequeue(per_cpu_ptr(pending->p, cpu)); if (ret) break; } return ret; } static bool rcu_pending_has_pending_or_armed(struct rcu_pending *pending) { int cpu; for_each_possible_cpu(cpu) { struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); spin_lock_irq(&p->lock); if (__rcu_pending_has_pending(p) || p->cb_armed) { spin_unlock_irq(&p->lock); return true; } spin_unlock_irq(&p->lock); } return false; } void rcu_pending_exit(struct rcu_pending *pending) { int cpu; if (!pending->p) return; while (rcu_pending_has_pending_or_armed(pending)) { __rcu_barrier(pending->srcu); for_each_possible_cpu(cpu) { struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); flush_work(&p->work); } } for_each_possible_cpu(cpu) { struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); flush_work(&p->work); } for_each_possible_cpu(cpu) { struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); static_array_for_each(p->lists, i) WARN_ON(i->head); WARN_ON(p->objs.nr); darray_exit(&p->objs); } free_percpu(pending->p); } /** * rcu_pending_init: - initialize a rcu_pending * * @pending: Object to init * @srcu: May optionally be used with an srcu_struct; if NULL, uses normal * RCU flavor * @process: Callback function invoked on objects once their RCU barriers * have completed; if NULL, kvfree() is used. */ int rcu_pending_init(struct rcu_pending *pending, struct srcu_struct *srcu, rcu_pending_process_fn process) { pending->p = alloc_percpu(struct rcu_pending_pcpu); if (!pending->p) return -ENOMEM; int cpu; for_each_possible_cpu(cpu) { struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); p->parent = pending; p->cpu = cpu; spin_lock_init(&p->lock); darray_init(&p->objs); INIT_WORK(&p->work, rcu_pending_work); } pending->srcu = srcu; pending->process = process; return 0; }
14 99 118 100 92 98 11 97 105 116 6 119 119 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 /* SPDX-License-Identifier: GPL-2.0-only */ /* * sha512_base.h - core logic for SHA-512 implementations * * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> */ #ifndef _CRYPTO_SHA512_BASE_H #define _CRYPTO_SHA512_BASE_H #include <crypto/internal/hash.h> #include <crypto/sha2.h> #include <linux/crypto.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src, int blocks); static inline int sha384_base_init(struct shash_desc *desc) { struct sha512_state *sctx = shash_desc_ctx(desc); sctx->state[0] = SHA384_H0; sctx->state[1] = SHA384_H1; sctx->state[2] = SHA384_H2; sctx->state[3] = SHA384_H3; sctx->state[4] = SHA384_H4; sctx->state[5] = SHA384_H5; sctx->state[6] = SHA384_H6; sctx->state[7] = SHA384_H7; sctx->count[0] = sctx->count[1] = 0; return 0; } static inline int sha512_base_init(struct shash_desc *desc) { struct sha512_state *sctx = shash_desc_ctx(desc); sctx->state[0] = SHA512_H0; sctx->state[1] = SHA512_H1; sctx->state[2] = SHA512_H2; sctx->state[3] = SHA512_H3; sctx->state[4] = SHA512_H4; sctx->state[5] = SHA512_H5; sctx->state[6] = SHA512_H6; sctx->state[7] = SHA512_H7; sctx->count[0] = sctx->count[1] = 0; return 0; } static inline int sha512_base_do_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha512_block_fn *block_fn) { struct sha512_state *sctx = shash_desc_ctx(desc); unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; sctx->count[0] += len; if (sctx->count[0] < len) sctx->count[1]++; if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) { int blocks; if (partial) { int p = SHA512_BLOCK_SIZE - partial; memcpy(sctx->buf + partial, data, p); data += p; len -= p; block_fn(sctx, sctx->buf, 1); } blocks = len / SHA512_BLOCK_SIZE; len %= SHA512_BLOCK_SIZE; if (blocks) { block_fn(sctx, data, blocks); data += blocks * SHA512_BLOCK_SIZE; } partial = 0; } if (len) memcpy(sctx->buf + partial, data, len); return 0; } static inline int sha512_base_do_finalize(struct shash_desc *desc, sha512_block_fn *block_fn) { const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]); struct sha512_state *sctx = shash_desc_ctx(desc); __be64 *bits = (__be64 *)(sctx->buf + bit_offset); unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; sctx->buf[partial++] = 0x80; if (partial > bit_offset) { memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial); partial = 0; block_fn(sctx, sctx->buf, 1); } memset(sctx->buf + partial, 0x0, bit_offset - partial); bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); bits[1] = cpu_to_be64(sctx->count[0] << 3); block_fn(sctx, sctx->buf, 1); return 0; } static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) { unsigned int digest_size = crypto_shash_digestsize(desc->tfm); struct sha512_state *sctx = shash_desc_ctx(desc); __be64 *digest = (__be64 *)out; int i; for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64)) put_unaligned_be64(sctx->state[i], digest++); memzero_explicit(sctx, sizeof(*sctx)); return 0; } #endif /* _CRYPTO_SHA512_BASE_H */
606 180 342 637 543 230 561 452 240 322 423 453 542 229 289 289 290 479 876 124 126 164 44 121 135 3 136 175 46 11 46 636 478 636 578 637 453 39 523 556 480 299 425 425 421 424 425 418 8 424 1 574 574 447 417 25 25 25 25 24 25 25 25 25 227 42 437 234 537 537 319 454 190 190 365 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 // SPDX-License-Identifier: GPL-2.0 #include "misc.h" #include "ctree.h" #include "block-rsv.h" #include "space-info.h" #include "transaction.h" #include "block-group.h" #include "fs.h" #include "accessors.h" /* * HOW DO BLOCK RESERVES WORK * * Think of block_rsv's as buckets for logically grouped metadata * reservations. Each block_rsv has a ->size and a ->reserved. ->size is * how large we want our block rsv to be, ->reserved is how much space is * currently reserved for this block reserve. * * ->failfast exists for the truncate case, and is described below. * * NORMAL OPERATION * * -> Reserve * Entrance: btrfs_block_rsv_add, btrfs_block_rsv_refill * * We call into btrfs_reserve_metadata_bytes() with our bytes, which is * accounted for in space_info->bytes_may_use, and then add the bytes to * ->reserved, and ->size in the case of btrfs_block_rsv_add. * * ->size is an over-estimation of how much we may use for a particular * operation. * * -> Use * Entrance: btrfs_use_block_rsv * * When we do a btrfs_alloc_tree_block() we call into btrfs_use_block_rsv() * to determine the appropriate block_rsv to use, and then verify that * ->reserved has enough space for our tree block allocation. Once * successful we subtract fs_info->nodesize from ->reserved. * * -> Finish * Entrance: btrfs_block_rsv_release * * We are finished with our operation, subtract our individual reservation * from ->size, and then subtract ->size from ->reserved and free up the * excess if there is any. * * There is some logic here to refill the delayed refs rsv or the global rsv * as needed, otherwise the excess is subtracted from * space_info->bytes_may_use. * * TYPES OF BLOCK RESERVES * * BLOCK_RSV_TRANS, BLOCK_RSV_DELOPS, BLOCK_RSV_CHUNK * These behave normally, as described above, just within the confines of the * lifetime of their particular operation (transaction for the whole trans * handle lifetime, for example). * * BLOCK_RSV_GLOBAL * It is impossible to properly account for all the space that may be required * to make our extent tree updates. This block reserve acts as an overflow * buffer in case our delayed refs reserve does not reserve enough space to * update the extent tree. * * We can steal from this in some cases as well, notably on evict() or * truncate() in order to help users recover from ENOSPC conditions. * * BLOCK_RSV_DELALLOC * The individual item sizes are determined by the per-inode size * calculations, which are described with the delalloc code. This is pretty * straightforward, it's just the calculation of ->size encodes a lot of * different items, and thus it gets used when updating inodes, inserting file * extents, and inserting checksums. * * BLOCK_RSV_DELREFS * We keep a running tally of how many delayed refs we have on the system. * We assume each one of these delayed refs are going to use a full * reservation. We use the transaction items and pre-reserve space for every * operation, and use this reservation to refill any gap between ->size and * ->reserved that may exist. * * From there it's straightforward, removing a delayed ref means we remove its * count from ->size and free up reservations as necessary. Since this is * the most dynamic block reserve in the system, we will try to refill this * block reserve first with any excess returned by any other block reserve. * * BLOCK_RSV_EMPTY * This is the fallback block reserve to make us try to reserve space if we * don't have a specific bucket for this allocation. It is mostly used for * updating the device tree and such, since that is a separate pool we're * content to just reserve space from the space_info on demand. * * BLOCK_RSV_TEMP * This is used by things like truncate and iput. We will temporarily * allocate a block reserve, set it to some size, and then truncate bytes * until we have no space left. With ->failfast set we'll simply return * ENOSPC from btrfs_use_block_rsv() to signal that we need to unwind and try * to make a new reservation. This is because these operations are * unbounded, so we want to do as much work as we can, and then back off and * re-reserve. */ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *dest, u64 num_bytes, u64 *qgroup_to_release_ret) { struct btrfs_space_info *space_info = block_rsv->space_info; u64 qgroup_to_release = 0; u64 ret; spin_lock(&block_rsv->lock); if (num_bytes == (u64)-1) { num_bytes = block_rsv->size; qgroup_to_release = block_rsv->qgroup_rsv_size; } block_rsv->size -= num_bytes; if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; block_rsv->reserved = block_rsv->size; block_rsv->full = true; } else { num_bytes = 0; } if (qgroup_to_release_ret && block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { qgroup_to_release = block_rsv->qgroup_rsv_reserved - block_rsv->qgroup_rsv_size; block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; } else { qgroup_to_release = 0; } spin_unlock(&block_rsv->lock); ret = num_bytes; if (num_bytes > 0) { if (dest) { spin_lock(&dest->lock); if (!dest->full) { u64 bytes_to_add; bytes_to_add = dest->size - dest->reserved; bytes_to_add = min(num_bytes, bytes_to_add); dest->reserved += bytes_to_add; if (dest->reserved >= dest->size) dest->full = true; num_bytes -= bytes_to_add; } spin_unlock(&dest->lock); } if (num_bytes) btrfs_space_info_free_bytes_may_use(space_info, num_bytes); } if (qgroup_to_release_ret) *qgroup_to_release_ret = qgroup_to_release; return ret; } int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, struct btrfs_block_rsv *dst, u64 num_bytes, bool update_size) { int ret; ret = btrfs_block_rsv_use_bytes(src, num_bytes); if (ret) return ret; btrfs_block_rsv_add_bytes(dst, num_bytes, update_size); return 0; } void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, enum btrfs_rsv_type type) { memset(rsv, 0, sizeof(*rsv)); spin_lock_init(&rsv->lock); rsv->type = type; } void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv, enum btrfs_rsv_type type) { btrfs_init_block_rsv(rsv, type); rsv->space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); } struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, enum btrfs_rsv_type type) { struct btrfs_block_rsv *block_rsv; block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); if (!block_rsv) return NULL; btrfs_init_metadata_block_rsv(fs_info, block_rsv, type); return block_rsv; } void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv) { if (!rsv) return; btrfs_block_rsv_release(fs_info, rsv, (u64)-1, NULL); kfree(rsv); } int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u64 num_bytes, enum btrfs_reserve_flush_enum flush) { int ret; if (num_bytes == 0) return 0; ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, num_bytes, flush); if (!ret) btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true); return ret; } int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_percent) { u64 num_bytes = 0; int ret = -ENOSPC; spin_lock(&block_rsv->lock); num_bytes = mult_perc(block_rsv->size, min_percent); if (block_rsv->reserved >= num_bytes) ret = 0; spin_unlock(&block_rsv->lock); return ret; } int btrfs_block_rsv_refill(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u64 num_bytes, enum btrfs_reserve_flush_enum flush) { int ret = -ENOSPC; if (!block_rsv) return 0; spin_lock(&block_rsv->lock); if (block_rsv->reserved >= num_bytes) ret = 0; else num_bytes -= block_rsv->reserved; spin_unlock(&block_rsv->lock); if (!ret) return 0; ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, num_bytes, flush); if (!ret) { btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false); return 0; } return ret; } u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u64 num_bytes, u64 *qgroup_to_release) { struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; struct btrfs_block_rsv *target = NULL; /* * If we are a delayed block reserve then push to the global rsv, * otherwise dump into the global delayed reserve if it is not full. */ if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS) target = global_rsv; else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv)) target = delayed_rsv; if (target && block_rsv->space_info != target->space_info) target = NULL; return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes, qgroup_to_release); } int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes) { int ret = -ENOSPC; spin_lock(&block_rsv->lock); if (block_rsv->reserved >= num_bytes) { block_rsv->reserved -= num_bytes; if (block_rsv->reserved < block_rsv->size) block_rsv->full = false; ret = 0; } spin_unlock(&block_rsv->lock); return ret; } void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes, bool update_size) { spin_lock(&block_rsv->lock); block_rsv->reserved += num_bytes; if (update_size) block_rsv->size += num_bytes; else if (block_rsv->reserved >= block_rsv->size) block_rsv->full = true; spin_unlock(&block_rsv->lock); } void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info) { struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; struct btrfs_space_info *sinfo = block_rsv->space_info; struct btrfs_root *root, *tmp; u64 num_bytes = btrfs_root_used(&fs_info->tree_root->root_item); unsigned int min_items = 1; /* * The global block rsv is based on the size of the extent tree, the * checksum tree and the root tree. If the fs is empty we want to set * it to a minimal amount for safety. * * We also are going to need to modify the minimum of the tree root and * any global roots we could touch. */ read_lock(&fs_info->global_root_lock); rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->global_root_tree, rb_node) { if (btrfs_root_id(root) == BTRFS_EXTENT_TREE_OBJECTID || btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID || btrfs_root_id(root) == BTRFS_FREE_SPACE_TREE_OBJECTID) { num_bytes += btrfs_root_used(&root->root_item); min_items++; } } read_unlock(&fs_info->global_root_lock); if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) { num_bytes += btrfs_root_used(&fs_info->block_group_root->root_item); min_items++; } if (btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE)) { num_bytes += btrfs_root_used(&fs_info->stripe_root->root_item); min_items++; } /* * But we also want to reserve enough space so we can do the fallback * global reserve for an unlink, which is an additional * BTRFS_UNLINK_METADATA_UNITS items. * * But we also need space for the delayed ref updates from the unlink, * so add BTRFS_UNLINK_METADATA_UNITS units for delayed refs, one for * each unlink metadata item. */ min_items += BTRFS_UNLINK_METADATA_UNITS; num_bytes = max_t(u64, num_bytes, btrfs_calc_insert_metadata_size(fs_info, min_items) + btrfs_calc_delayed_ref_bytes(fs_info, BTRFS_UNLINK_METADATA_UNITS)); spin_lock(&sinfo->lock); spin_lock(&block_rsv->lock); block_rsv->size = min_t(u64, num_bytes, SZ_512M); if (block_rsv->reserved < block_rsv->size) { num_bytes = block_rsv->size - block_rsv->reserved; btrfs_space_info_update_bytes_may_use(sinfo, num_bytes); block_rsv->reserved = block_rsv->size; } else if (block_rsv->reserved > block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; btrfs_space_info_update_bytes_may_use(sinfo, -num_bytes); block_rsv->reserved = block_rsv->size; btrfs_try_granting_tickets(fs_info, sinfo); } block_rsv->full = (block_rsv->reserved == block_rsv->size); if (block_rsv->size >= sinfo->total_bytes) sinfo->force_alloc = CHUNK_ALLOC_FORCE; spin_unlock(&block_rsv->lock); spin_unlock(&sinfo->lock); } void btrfs_init_root_block_rsv(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; switch (btrfs_root_id(root)) { case BTRFS_CSUM_TREE_OBJECTID: case BTRFS_EXTENT_TREE_OBJECTID: case BTRFS_FREE_SPACE_TREE_OBJECTID: case BTRFS_BLOCK_GROUP_TREE_OBJECTID: case BTRFS_RAID_STRIPE_TREE_OBJECTID: root->block_rsv = &fs_info->delayed_refs_rsv; break; case BTRFS_ROOT_TREE_OBJECTID: case BTRFS_DEV_TREE_OBJECTID: case BTRFS_QUOTA_TREE_OBJECTID: root->block_rsv = &fs_info->global_block_rsv; break; case BTRFS_CHUNK_TREE_OBJECTID: root->block_rsv = &fs_info->chunk_block_rsv; break; default: root->block_rsv = NULL; break; } } void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info) { struct btrfs_space_info *space_info; space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); fs_info->chunk_block_rsv.space_info = space_info; space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); fs_info->global_block_rsv.space_info = space_info; fs_info->trans_block_rsv.space_info = space_info; fs_info->empty_block_rsv.space_info = space_info; fs_info->delayed_block_rsv.space_info = space_info; fs_info->delayed_refs_rsv.space_info = space_info; btrfs_update_global_block_rsv(fs_info); } void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info) { btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1, NULL); WARN_ON(fs_info->trans_block_rsv.size > 0); WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); WARN_ON(fs_info->chunk_block_rsv.reserved > 0); WARN_ON(fs_info->delayed_block_rsv.size > 0); WARN_ON(fs_info->delayed_block_rsv.reserved > 0); WARN_ON(fs_info->delayed_refs_rsv.reserved > 0); WARN_ON(fs_info->delayed_refs_rsv.size > 0); } static struct btrfs_block_rsv *get_block_rsv( const struct btrfs_trans_handle *trans, const struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *block_rsv = NULL; if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) || (root == fs_info->uuid_root) || (trans->adding_csums && btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID)) block_rsv = trans->block_rsv; if (!block_rsv) block_rsv = root->block_rsv; if (!block_rsv) block_rsv = &fs_info->empty_block_rsv; return block_rsv; } struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; int ret; bool global_updated = false; block_rsv = get_block_rsv(trans, root); if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve; again: ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); if (!ret) return block_rsv; if (block_rsv->failfast) return ERR_PTR(ret); if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { global_updated = true; btrfs_update_global_block_rsv(fs_info); goto again; } /* * The global reserve still exists to save us from ourselves, so don't * warn_on if we are short on our delayed refs reserve. */ if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL * 10, /*DEFAULT_RATELIMIT_BURST*/ 1); if (__ratelimit(&_rs)) WARN(1, KERN_DEBUG "BTRFS: block rsv %d returned %d\n", block_rsv->type, ret); } try_reserve: ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, blocksize, BTRFS_RESERVE_NO_FLUSH); if (!ret) return block_rsv; /* * If we couldn't reserve metadata bytes try and use some from * the global reserve if its space type is the same as the global * reservation. */ if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && block_rsv->space_info == global_rsv->space_info) { ret = btrfs_block_rsv_use_bytes(global_rsv, blocksize); if (!ret) return global_rsv; } /* * All hope is lost, but of course our reservations are overly * pessimistic, so instead of possibly having an ENOSPC abort here, try * one last time to force a reservation if there's enough actual space * on disk to make the reservation. */ ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, blocksize, BTRFS_RESERVE_FLUSH_EMERGENCY); if (!ret) return block_rsv; return ERR_PTR(ret); } int btrfs_check_trunc_cache_free_space(const struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv) { u64 needed_bytes; int ret; /* 1 for slack space, 1 for updating the inode */ needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + btrfs_calc_metadata_size(fs_info, 1); spin_lock(&rsv->lock); if (rsv->reserved < needed_bytes) ret = -ENOSPC; else ret = 0; spin_unlock(&rsv->lock); return ret; }
1 10 10 10 10 6 10 10 10 10 3 3 3 4 5 5 5 5 5 5 5 5 5 5 5 5 16 15 13 5 9 15 16 16 16 16 3 5 5 7 5 16 16 3 16 16 16 16 16 16 9 16 21 21 7 7 7 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 // SPDX-License-Identifier: GPL-2.0 #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/wext.h> #include <net/hotdata.h> #include "dev.h" static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos) { unsigned long ifindex = *pos; struct net_device *dev; for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { *pos = dev->ifindex; return dev; } return NULL; } static void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; return dev_seq_from_index(seq, pos); } static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return dev_seq_from_index(seq, pos); } static void dev_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, stats->rx_fifo_errors, stats->rx_length_errors + stats->rx_over_errors + stats->rx_crc_errors + stats->rx_frame_errors, stats->rx_compressed, stats->multicast, stats->tx_bytes, stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, stats->tx_carrier_errors + stats->tx_aborted_errors + stats->tx_window_errors + stats->tx_heartbeat_errors, stats->tx_compressed); } /* * Called from the PROCfs module. This now uses the new arbitrary sized * /proc/net interface to create /proc/net/dev */ static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "Inter-| Receive " " | Transmit\n" " face |bytes packets errs drop fifo frame " "compressed multicast|bytes packets errs " "drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; } static u32 softnet_input_pkt_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->input_pkt_queue); } static u32 softnet_process_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->process_queue); } static struct softnet_data *softnet_get_online(loff_t *pos) { struct softnet_data *sd = NULL; while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { sd = &per_cpu(softnet_data, *pos); break; } else ++*pos; return sd; } static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) { return softnet_get_online(pos); } static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return softnet_get_online(pos); } static void softnet_seq_stop(struct seq_file *seq, void *v) { } static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; u32 input_qlen = softnet_input_pkt_queue_len(sd); u32 process_qlen = softnet_process_queue_len(sd); unsigned int flow_limit_count = 0; #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit *fl; rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) flow_limit_count = fl->count; rcu_read_unlock(); #endif /* the index is the CPU id owing this sd. Since offline CPUs are not * displayed, it would be othrwise not trivial for the user-space * mapping the data a specific CPU */ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x\n", sd->processed, atomic_read(&sd->dropped), sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ sd->received_rps, flow_limit_count, input_qlen + process_qlen, (int)seq->index, input_qlen, process_qlen); return 0; } static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_seq_show, }; static const struct seq_operations softnet_seq_ops = { .start = softnet_seq_start, .next = softnet_seq_next, .stop = softnet_seq_stop, .show = softnet_seq_show, }; static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; struct net_device *dev; loff_t i = 0; int t; for_each_netdev_rcu(seq_file_net(seq), dev) { ptype_list = &dev->ptype_all; list_for_each_entry_rcu(pt, ptype_list, list) { if (i == pos) return pt; ++i; } } list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) { if (i == pos) return pt; ++i; } for (t = 0; t < PTYPE_HASH_SIZE; t++) { list_for_each_entry_rcu(pt, &ptype_base[t], list) { if (i == pos) return pt; ++i; } } return NULL; } static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; if (pt->dev) { if (nxt != &pt->dev->ptype_all) goto found; dev = pt->dev; for_each_netdev_continue_rcu(seq_file_net(seq), dev) { if (!list_empty(&dev->ptype_all)) { nxt = dev->ptype_all.next; goto found; } } nxt = net_hotdata.ptype_all.next; goto ptype_all; } if (pt->type == htons(ETH_P_ALL)) { ptype_all: if (nxt != &net_hotdata.ptype_all) goto found; hash = 0; nxt = ptype_base[0].next; } else hash = ntohs(pt->type) & PTYPE_HASH_MASK; while (nxt == &ptype_base[hash]) { if (++hash >= PTYPE_HASH_SIZE) return NULL; nxt = ptype_base[hash].next; } found: return list_entry(nxt, struct packet_type, list); } static void ptype_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); seq_printf(seq, " %-8s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } return 0; } static const struct seq_operations ptype_seq_ops = { .start = ptype_seq_start, .next = ptype_seq_next, .stop = ptype_seq_stop, .show = ptype_seq_show, }; static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, sizeof(struct seq_net_private))) goto out; if (!proc_create_seq("softnet_stat", 0444, net->proc_net, &softnet_seq_ops)) goto out_dev; if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, sizeof(struct seq_net_private))) goto out_softnet; if (wext_proc_init(net)) goto out_ptype; rc = 0; out: return rc; out_ptype: remove_proc_entry("ptype", net->proc_net); out_softnet: remove_proc_entry("softnet_stat", net->proc_net); out_dev: remove_proc_entry("dev", net->proc_net); goto out; } static void __net_exit dev_proc_net_exit(struct net *net) { wext_proc_exit(net); remove_proc_entry("ptype", net->proc_net); remove_proc_entry("softnet_stat", net->proc_net); remove_proc_entry("dev", net->proc_net); } static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct netdev_hw_addr *ha; struct net_device *dev = v; if (v == SEQ_START_TOKEN) return 0; netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); } netif_addr_unlock_bh(dev); return 0; } static const struct seq_operations dev_mc_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_mc_seq_show, }; static int __net_init dev_mc_net_init(struct net *net) { if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } static void __net_exit dev_mc_net_exit(struct net *net) { remove_proc_entry("dev_mcast", net->proc_net); } static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, }; int __init dev_proc_init(void) { int ret = register_pernet_subsys(&dev_proc_ops); if (!ret) return register_pernet_subsys(&dev_mc_net_ops); return ret; }
1 98 81 1 147 147 81 147 147 146 147 2 2 1 1 87 3 3 81 96 81 20 20 1 1 18 18 90 8 82 47 51 82 81 82 82 1 1 1 1 1 1 3 3 1 1 1 2 3 3 2 3 5 1 1 3 3 3 3 78 77 6 75 1 75 75 74 1 1 75 75 128 19 45 75 75 94 94 1 1 1 1 1 1 1 1 81 1 81 1 1 81 81 81 81 81 81 81 81 81 81 80 1 1 1 1 1 81 81 80 1 79 1 1 79 80 1 1 81 81 81 1 81 6 6 6 6 6 6 6 6 6 6 13 4 3 6 6 6 6 6 6 6 91 91 84 84 147 85 78 3 77 78 82 3 79 84 3 79 2 81 81 80 3 3 2 1 3 2 2 2 2 2 1 3 3 3 3 3 3 2 1 1 2 84 84 3 81 81 81 81 81 81 81 11 1 10 3 3 1 3 2 3 1 2 2 1 3 10 9 1 10 10 10 92 2 96 96 106 10 95 80 1 4 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 // SPDX-License-Identifier: GPL-2.0-only /* * xattr.c * * Copyright (C) 2004, 2008 Oracle. All rights reserved. * * CREDITS: * Lots of code in this file is copy from linux/fs/ext3/xattr.c. * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> */ #include <linux/capability.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/uio.h> #include <linux/sched.h> #include <linux/splice.h> #include <linux/mount.h> #include <linux/writeback.h> #include <linux/falloc.h> #include <linux/sort.h> #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> #include <linux/security.h> #include <cluster/masklog.h> #include "ocfs2.h" #include "alloc.h" #include "blockcheck.h" #include "dlmglue.h" #include "file.h" #include "symlink.h" #include "sysfile.h" #include "inode.h" #include "journal.h" #include "ocfs2_fs.h" #include "suballoc.h" #include "uptodate.h" #include "buffer_head_io.h" #include "super.h" #include "xattr.h" #include "refcounttree.h" #include "acl.h" #include "ocfs2_trace.h" struct ocfs2_xattr_def_value_root { struct ocfs2_xattr_value_root xv; struct ocfs2_extent_rec er; }; struct ocfs2_xattr_bucket { /* The inode these xattrs are associated with */ struct inode *bu_inode; /* The actual buffers that make up the bucket */ struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; /* How many blocks make up one bucket for this filesystem */ int bu_blocks; }; struct ocfs2_xattr_set_ctxt { handle_t *handle; struct ocfs2_alloc_context *meta_ac; struct ocfs2_alloc_context *data_ac; struct ocfs2_cached_dealloc_ctxt dealloc; int set_abort; }; #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) #define OCFS2_XATTR_INLINE_SIZE 80 #define OCFS2_XATTR_HEADER_GAP 4 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ - sizeof(struct ocfs2_xattr_header) \ - OCFS2_XATTR_HEADER_GAP) #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ - sizeof(struct ocfs2_xattr_block) \ - sizeof(struct ocfs2_xattr_header) \ - OCFS2_XATTR_HEADER_GAP) static struct ocfs2_xattr_def_value_root def_xv = { .xv.xr_list.l_count = cpu_to_le16(1), }; const struct xattr_handler * const ocfs2_xattr_handlers[] = { &ocfs2_xattr_user_handler, &ocfs2_xattr_trusted_handler, &ocfs2_xattr_security_handler, NULL }; static const struct xattr_handler * const ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, }; struct ocfs2_xattr_info { int xi_name_index; const char *xi_name; int xi_name_len; const void *xi_value; size_t xi_value_len; }; struct ocfs2_xattr_search { struct buffer_head *inode_bh; /* * xattr_bh point to the block buffer head which has extended attribute * when extended attribute in inode, xattr_bh is equal to inode_bh. */ struct buffer_head *xattr_bh; struct ocfs2_xattr_header *header; struct ocfs2_xattr_bucket *bucket; void *base; void *end; struct ocfs2_xattr_entry *here; int not_found; }; /* Operations on struct ocfs2_xa_entry */ struct ocfs2_xa_loc; struct ocfs2_xa_loc_operations { /* * Journal functions */ int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, int type); void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); /* * Return a pointer to the appropriate buffer in loc->xl_storage * at the given offset from loc->xl_header. */ void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); /* Can we reuse the existing entry for the new value? */ int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi); /* How much space is needed for the new value? */ int (*xlo_check_space)(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi); /* * Return the offset of the first name+value pair. This is * the start of our downward-filling free space. */ int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); /* * Remove the name+value at this location. Do whatever is * appropriate with the remaining name+value pairs. */ void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); /* Fill xl_entry with a new entry */ void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); /* Add name+value storage to an entry */ void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); /* * Initialize the value buf's access and bh fields for this entry. * ocfs2_xa_fill_value_buf() will handle the xv pointer. */ void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_value_buf *vb); }; /* * Describes an xattr entry location. This is a memory structure * tracking the on-disk structure. */ struct ocfs2_xa_loc { /* This xattr belongs to this inode */ struct inode *xl_inode; /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ struct ocfs2_xattr_header *xl_header; /* Bytes from xl_header to the end of the storage */ int xl_size; /* * The ocfs2_xattr_entry this location describes. If this is * NULL, this location describes the on-disk structure where it * would have been. */ struct ocfs2_xattr_entry *xl_entry; /* * Internal housekeeping */ /* Buffer(s) containing this entry */ void *xl_storage; /* Operations on the storage backing this location */ const struct ocfs2_xa_loc_operations *xl_ops; }; /* * Convenience functions to calculate how much space is needed for a * given name+value pair */ static int namevalue_size(int name_len, uint64_t value_len) { if (value_len > OCFS2_XATTR_INLINE_SIZE) return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; else return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); } static int namevalue_size_xi(struct ocfs2_xattr_info *xi) { return namevalue_size(xi->xi_name_len, xi->xi_value_len); } static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) { u64 value_len = le64_to_cpu(xe->xe_value_size); BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && ocfs2_xattr_is_local(xe)); return namevalue_size(xe->xe_name_len, value_len); } static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, struct ocfs2_xattr_header *xh, int index, int *block_off, int *new_offset); static int ocfs2_xattr_block_find(struct inode *inode, int name_index, const char *name, struct ocfs2_xattr_search *xs); static int ocfs2_xattr_index_block_find(struct inode *inode, struct buffer_head *root_bh, int name_index, const char *name, struct ocfs2_xattr_search *xs); static int ocfs2_xattr_tree_list_index_block(struct inode *inode, struct buffer_head *blk_bh, char *buffer, size_t buffer_size); static int ocfs2_xattr_create_index_block(struct inode *inode, struct ocfs2_xattr_search *xs, struct ocfs2_xattr_set_ctxt *ctxt); static int ocfs2_xattr_set_entry_index_block(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, struct ocfs2_xattr_set_ctxt *ctxt); typedef int (xattr_tree_rec_func)(struct inode *inode, struct buffer_head *root_bh, u64 blkno, u32 cpos, u32 len, void *para); static int ocfs2_iterate_xattr_index_block(struct inode *inode, struct buffer_head *root_bh, xattr_tree_rec_func *rec_func, void *para); static int ocfs2_delete_xattr_in_bucket(struct inode *inode, struct ocfs2_xattr_bucket *bucket, void *para); static int ocfs2_rm_xattr_cluster(struct inode *inode, struct buffer_head *root_bh, u64 blkno, u32 cpos, u32 len, void *para); static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, u64 src_blk, u64 last_blk, u64 to_blk, unsigned int start_bucket, u32 *first_hash); static int ocfs2_prepare_refcount_xattr(struct inode *inode, struct ocfs2_dinode *di, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xis, struct ocfs2_xattr_search *xbs, struct ocfs2_refcount_tree **ref_tree, int *meta_need, int *credits); static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, struct ocfs2_xattr_bucket *bucket, int offset, struct ocfs2_xattr_value_root **xv, struct buffer_head **bh); static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) { return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; } static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) { return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); } #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) { struct ocfs2_xattr_bucket *bucket; int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); if (bucket) { bucket->bu_inode = inode; bucket->bu_blocks = blks; } return bucket; } static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) { int i; for (i = 0; i < bucket->bu_blocks; i++) { brelse(bucket->bu_bhs[i]); bucket->bu_bhs[i] = NULL; } } static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) { if (bucket) { ocfs2_xattr_bucket_relse(bucket); bucket->bu_inode = NULL; kfree(bucket); } } /* * A bucket that has never been written to disk doesn't need to be * read. We just need the buffer_heads. Don't call this for * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes * them fully. */ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, u64 xb_blkno, int new) { int i, rc = 0; for (i = 0; i < bucket->bu_blocks; i++) { bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, xb_blkno + i); if (!bucket->bu_bhs[i]) { rc = -ENOMEM; mlog_errno(rc); break; } if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), bucket->bu_bhs[i])) { if (new) ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), bucket->bu_bhs[i]); else { set_buffer_uptodate(bucket->bu_bhs[i]); ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), bucket->bu_bhs[i]); } } } if (rc) ocfs2_xattr_bucket_relse(bucket); return rc; } /* Read the xattr bucket at xb_blkno */ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, u64 xb_blkno) { int rc; rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, bucket->bu_blocks, bucket->bu_bhs, 0, NULL); if (!rc) { spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, bucket->bu_bhs, bucket->bu_blocks, &bucket_xh(bucket)->xh_check); spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); if (rc) mlog_errno(rc); } if (rc) ocfs2_xattr_bucket_relse(bucket); return rc; } static int ocfs2_xattr_bucket_journal_access(handle_t *handle, struct ocfs2_xattr_bucket *bucket, int type) { int i, rc = 0; for (i = 0; i < bucket->bu_blocks; i++) { rc = ocfs2_journal_access(handle, INODE_CACHE(bucket->bu_inode), bucket->bu_bhs[i], type); if (rc) { mlog_errno(rc); break; } } return rc; } static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, struct ocfs2_xattr_bucket *bucket) { int i; spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, bucket->bu_bhs, bucket->bu_blocks, &bucket_xh(bucket)->xh_check); spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); for (i = 0; i < bucket->bu_blocks; i++) ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); } static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, struct ocfs2_xattr_bucket *src) { int i; int blocksize = src->bu_inode->i_sb->s_blocksize; BUG_ON(dest->bu_blocks != src->bu_blocks); BUG_ON(dest->bu_inode != src->bu_inode); for (i = 0; i < src->bu_blocks; i++) { memcpy(bucket_block(dest, i), bucket_block(src, i), blocksize); } } static int ocfs2_validate_xattr_block(struct super_block *sb, struct buffer_head *bh) { int rc; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)bh->b_data; trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); BUG_ON(!buffer_uptodate(bh)); /* * If the ecc fails, we return the error but otherwise * leave the filesystem running. We know any error is * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); if (rc) return rc; /* * Errors after here are fatal */ if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { return ocfs2_error(sb, "Extended attribute block #%llu has bad signature %.*s\n", (unsigned long long)bh->b_blocknr, 7, xb->xb_signature); } if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { return ocfs2_error(sb, "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", (unsigned long long)bh->b_blocknr, (unsigned long long)le64_to_cpu(xb->xb_blkno)); } if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { return ocfs2_error(sb, "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", (unsigned long long)bh->b_blocknr, le32_to_cpu(xb->xb_fs_generation)); } return 0; } static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, struct buffer_head **bh) { int rc; struct buffer_head *tmp = *bh; rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, ocfs2_validate_xattr_block); /* If ocfs2_read_block() got us a new bh, pass it up. */ if (!rc && !*bh) *bh = tmp; return rc; } static inline const char *ocfs2_xattr_prefix(int name_index) { const struct xattr_handler *handler = NULL; if (name_index > 0 && name_index < OCFS2_XATTR_MAX) handler = ocfs2_xattr_handler_map[name_index]; return handler ? xattr_prefix(handler) : NULL; } static u32 ocfs2_xattr_name_hash(struct inode *inode, const char *name, int name_len) { /* Get hash value of uuid from super block */ u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; int i; /* hash extended attribute name */ for (i = 0; i < name_len; i++) { hash = (hash << OCFS2_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ *name++; } return hash; } static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) { return namevalue_size(name_len, value_len) + sizeof(struct ocfs2_xattr_entry); } static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) { return namevalue_size_xi(xi) + sizeof(struct ocfs2_xattr_entry); } static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) { return namevalue_size_xe(xe) + sizeof(struct ocfs2_xattr_entry); } int ocfs2_calc_security_init(struct inode *dir, struct ocfs2_security_xattr_info *si, int *want_clusters, int *xattr_credits, struct ocfs2_alloc_context **xattr_ac) { int ret = 0; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), si->value_len); /* * The max space of security xattr taken inline is * 256(name) + 80(value) + 16(entry) = 352 bytes, * So reserve one metadata block for it is ok. */ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || s_size > OCFS2_XATTR_FREE_IN_IBODY) { ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); if (ret) { mlog_errno(ret); return ret; } *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; } /* reserve clusters for xattr value which will be set in B tree*/ if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, si->value_len); *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, new_clusters); *want_clusters += new_clusters; } return ret; } int ocfs2_calc_xattr_init(struct inode *dir, struct buffer_head *dir_bh, umode_t mode, struct ocfs2_security_xattr_info *si, int *want_clusters, int *xattr_credits, int *want_meta) { int ret = 0; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); int s_size = 0, a_size = 0, acl_len = 0, new_clusters; if (si->enable) s_size = ocfs2_xattr_entry_real_size(strlen(si->name), si->value_len); if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { down_read(&OCFS2_I(dir)->ip_xattr_sem); acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, "", NULL, 0); up_read(&OCFS2_I(dir)->ip_xattr_sem); if (acl_len > 0) { a_size = ocfs2_xattr_entry_real_size(0, acl_len); if (S_ISDIR(mode)) a_size <<= 1; } else if (acl_len != 0 && acl_len != -ENODATA) { ret = acl_len; mlog_errno(ret); return ret; } } if (!(s_size + a_size)) return ret; /* * The max space of security xattr taken inline is * 256(name) + 80(value) + 16(entry) = 352 bytes, * The max space of acl xattr taken inline is * 80(value) + 16(entry) * 2(if directory) = 192 bytes, * when blocksize = 512, may reserve one more cluster for * xattr bucket, otherwise reserve one metadata block * for them is ok. * If this is a new directory with inline data, * we choose to reserve the entire inline area for * directory contents and force an external xattr block. */ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { *want_meta = *want_meta + 1; *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; } if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { *want_clusters += 1; *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); } /* * reserve credits and clusters for xattrs which has large value * and have to be set outside */ if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, si->value_len); *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, new_clusters); *want_clusters += new_clusters; } if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && acl_len > OCFS2_XATTR_INLINE_SIZE) { /* for directory, it has DEFAULT and ACCESS two types of acls */ new_clusters = (S_ISDIR(mode) ? 2 : 1) * ocfs2_clusters_for_bytes(dir->i_sb, acl_len); *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, new_clusters); *want_clusters += new_clusters; } return ret; } static int ocfs2_xattr_extend_allocation(struct inode *inode, u32 clusters_to_add, struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_set_ctxt *ctxt) { int status = 0, credits; handle_t *handle = ctxt->handle; enum ocfs2_alloc_restarted why; u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); struct ocfs2_extent_tree et; ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); while (clusters_to_add) { trace_ocfs2_xattr_extend_allocation(clusters_to_add); status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); break; } prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); status = ocfs2_add_clusters_in_btree(handle, &et, &logical_start, clusters_to_add, 0, ctxt->data_ac, ctxt->meta_ac, &why); if ((status < 0) && (status != -EAGAIN)) { if (status != -ENOSPC) mlog_errno(status); break; } ocfs2_journal_dirty(handle, vb->vb_bh); clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; if (why != RESTART_NONE && clusters_to_add) { /* * We can only fail in case the alloc file doesn't give * up enough clusters. */ BUG_ON(why == RESTART_META); credits = ocfs2_calc_extend_credits(inode->i_sb, &vb->vb_xv->xr_list); status = ocfs2_extend_trans(handle, credits); if (status < 0) { status = -ENOMEM; mlog_errno(status); break; } } } return status; } static int __ocfs2_remove_xattr_range(struct inode *inode, struct ocfs2_xattr_value_buf *vb, u32 cpos, u32 phys_cpos, u32 len, unsigned int ext_flags, struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); handle_t *handle = ctxt->handle; struct ocfs2_extent_tree et; ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, &ctxt->dealloc); if (ret) { mlog_errno(ret); goto out; } le32_add_cpu(&vb->vb_xv->xr_clusters, -len); ocfs2_journal_dirty(handle, vb->vb_bh); if (ext_flags & OCFS2_EXT_REFCOUNTED) ret = ocfs2_decrease_refcount(inode, handle, ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno), len, ctxt->meta_ac, &ctxt->dealloc, 1); else ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len); if (ret) mlog_errno(ret); out: return ret; } static int ocfs2_xattr_shrink_size(struct inode *inode, u32 old_clusters, u32 new_clusters, struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_set_ctxt *ctxt) { int ret = 0; unsigned int ext_flags; u32 trunc_len, cpos, phys_cpos, alloc_size; u64 block; if (old_clusters <= new_clusters) return 0; cpos = new_clusters; trunc_len = old_clusters - new_clusters; while (trunc_len) { ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, &alloc_size, &vb->vb_xv->xr_list, &ext_flags); if (ret) { mlog_errno(ret); goto out; } if (alloc_size > trunc_len) alloc_size = trunc_len; ret = __ocfs2_remove_xattr_range(inode, vb, cpos, phys_cpos, alloc_size, ext_flags, ctxt); if (ret) { mlog_errno(ret); goto out; } block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), block, alloc_size); cpos += alloc_size; trunc_len -= alloc_size; } out: return ret; } static int ocfs2_xattr_value_truncate(struct inode *inode, struct ocfs2_xattr_value_buf *vb, int len, struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); if (new_clusters == old_clusters) return 0; if (new_clusters > old_clusters) ret = ocfs2_xattr_extend_allocation(inode, new_clusters - old_clusters, vb, ctxt); else ret = ocfs2_xattr_shrink_size(inode, old_clusters, new_clusters, vb, ctxt); return ret; } static int ocfs2_xattr_list_entry(struct super_block *sb, char *buffer, size_t size, size_t *result, int type, const char *name, int name_len) { char *p = buffer + *result; const char *prefix; int prefix_len; int total_len; switch(type) { case OCFS2_XATTR_INDEX_USER: if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) return 0; break; case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: if (!(sb->s_flags & SB_POSIXACL)) return 0; break; case OCFS2_XATTR_INDEX_TRUSTED: if (!capable(CAP_SYS_ADMIN)) return 0; break; } prefix = ocfs2_xattr_prefix(type); if (!prefix) return 0; prefix_len = strlen(prefix); total_len = prefix_len + name_len + 1; *result += total_len; /* we are just looking for how big our buffer needs to be */ if (!size) return 0; if (*result > size) return -ERANGE; memcpy(p, prefix, prefix_len); memcpy(p + prefix_len, name, name_len); p[prefix_len + name_len] = '\0'; return 0; } static int ocfs2_xattr_list_entries(struct inode *inode, struct ocfs2_xattr_header *header, char *buffer, size_t buffer_size) { size_t result = 0; int i, type, ret; const char *name; for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; type = ocfs2_xattr_get_type(entry); name = (const char *)header + le16_to_cpu(entry->xe_name_offset); ret = ocfs2_xattr_list_entry(inode->i_sb, buffer, buffer_size, &result, type, name, entry->xe_name_len); if (ret) return ret; } return result; } int ocfs2_has_inline_xattr_value_outside(struct inode *inode, struct ocfs2_dinode *di) { struct ocfs2_xattr_header *xh; int i; xh = (struct ocfs2_xattr_header *) ((void *)di + inode->i_sb->s_blocksize - le16_to_cpu(di->i_xattr_inline_size)); for (i = 0; i < le16_to_cpu(xh->xh_count); i++) if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) return 1; return 0; } static int ocfs2_xattr_ibody_list(struct inode *inode, struct ocfs2_dinode *di, char *buffer, size_t buffer_size) { struct ocfs2_xattr_header *header = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); int ret = 0; if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) return ret; header = (struct ocfs2_xattr_header *) ((void *)di + inode->i_sb->s_blocksize - le16_to_cpu(di->i_xattr_inline_size)); ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); return ret; } static int ocfs2_xattr_block_list(struct inode *inode, struct ocfs2_dinode *di, char *buffer, size_t buffer_size) { struct buffer_head *blk_bh = NULL; struct ocfs2_xattr_block *xb; int ret = 0; if (!di->i_xattr_loc) return ret; ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); if (ret < 0) { mlog_errno(ret); return ret; } xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); } else ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, buffer, buffer_size); brelse(blk_bh); return ret; } ssize_t ocfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) { int ret = 0, i_ret = 0, b_ret = 0; struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di = NULL; struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) return -EOPNOTSUPP; if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) return ret; ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); if (ret < 0) { mlog_errno(ret); return ret; } di = (struct ocfs2_dinode *)di_bh->b_data; down_read(&oi->ip_xattr_sem); i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); if (i_ret < 0) b_ret = 0; else { if (buffer) { buffer += i_ret; size -= i_ret; } b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, buffer, size); if (b_ret < 0) i_ret = 0; } up_read(&oi->ip_xattr_sem); ocfs2_inode_unlock(d_inode(dentry), 0); brelse(di_bh); return i_ret + b_ret; } static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, const char *name, struct ocfs2_xattr_search *xs) { struct ocfs2_xattr_entry *entry; size_t name_len; int i, name_offset, cmp = 1; if (name == NULL) return -EINVAL; name_len = strlen(name); entry = xs->here; for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { if ((void *)entry >= xs->end) { ocfs2_error(inode->i_sb, "corrupted xattr entries"); return -EFSCORRUPTED; } cmp = name_index - ocfs2_xattr_get_type(entry); if (!cmp) cmp = name_len - entry->xe_name_len; if (!cmp) { name_offset = le16_to_cpu(entry->xe_name_offset); if ((xs->base + name_offset + name_len) > xs->end) { ocfs2_error(inode->i_sb, "corrupted xattr entries"); return -EFSCORRUPTED; } cmp = memcmp(name, (xs->base + name_offset), name_len); } if (cmp == 0) break; entry += 1; } xs->here = entry; return cmp ? -ENODATA : 0; } static int ocfs2_xattr_get_value_outside(struct inode *inode, struct ocfs2_xattr_value_root *xv, void *buffer, size_t len) { u32 cpos, p_cluster, num_clusters, bpc, clusters; u64 blkno; int i, ret = 0; size_t cplen, blocksize; struct buffer_head *bh = NULL; struct ocfs2_extent_list *el; el = &xv->xr_list; clusters = le32_to_cpu(xv->xr_clusters); bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); blocksize = inode->i_sb->s_blocksize; cpos = 0; while (cpos < clusters) { ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, &num_clusters, el, NULL); if (ret) { mlog_errno(ret); goto out; } blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); /* Copy ocfs2_xattr_value */ for (i = 0; i < num_clusters * bpc; i++, blkno++) { ret = ocfs2_read_block(INODE_CACHE(inode), blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; } cplen = len >= blocksize ? blocksize : len; memcpy(buffer, bh->b_data, cplen); len -= cplen; buffer += cplen; brelse(bh); bh = NULL; if (len == 0) break; } cpos += num_clusters; } out: return ret; } static int ocfs2_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size, struct ocfs2_xattr_search *xs) { struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; struct ocfs2_xattr_value_root *xv; size_t size; int ret = 0; if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) return -ENODATA; xs->end = (void *)di + inode->i_sb->s_blocksize; xs->header = (struct ocfs2_xattr_header *) (xs->end - le16_to_cpu(di->i_xattr_inline_size)); xs->base = (void *)xs->header; xs->here = xs->header->xh_entries; ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret) return ret; size = le64_to_cpu(xs->here->xe_value_size); if (buffer) { if (size > buffer_size) return -ERANGE; if (ocfs2_xattr_is_local(xs->here)) { memcpy(buffer, (void *)xs->base + le16_to_cpu(xs->here->xe_name_offset) + OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); } else { xv = (struct ocfs2_xattr_value_root *) (xs->base + le16_to_cpu( xs->here->xe_name_offset) + OCFS2_XATTR_SIZE(xs->here->xe_name_len)); ret = ocfs2_xattr_get_value_outside(inode, xv, buffer, size); if (ret < 0) { mlog_errno(ret); return ret; } } } return size; } static int ocfs2_xattr_block_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size, struct ocfs2_xattr_search *xs) { struct ocfs2_xattr_block *xb; struct ocfs2_xattr_value_root *xv; size_t size; int ret = -ENODATA, name_offset, name_len, i; int block_off; xs->bucket = ocfs2_xattr_bucket_new(inode); if (!xs->bucket) { ret = -ENOMEM; mlog_errno(ret); goto cleanup; } ret = ocfs2_xattr_block_find(inode, name_index, name, xs); if (ret) { mlog_errno(ret); goto cleanup; } if (xs->not_found) { ret = -ENODATA; goto cleanup; } xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; size = le64_to_cpu(xs->here->xe_value_size); if (buffer) { ret = -ERANGE; if (size > buffer_size) goto cleanup; name_offset = le16_to_cpu(xs->here->xe_name_offset); name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); i = xs->here - xs->header->xh_entries; if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, bucket_xh(xs->bucket), i, &block_off, &name_offset); if (ret) { mlog_errno(ret); goto cleanup; } xs->base = bucket_block(xs->bucket, block_off); } if (ocfs2_xattr_is_local(xs->here)) { memcpy(buffer, (void *)xs->base + name_offset + name_len, size); } else { xv = (struct ocfs2_xattr_value_root *) (xs->base + name_offset + name_len); ret = ocfs2_xattr_get_value_outside(inode, xv, buffer, size); if (ret < 0) { mlog_errno(ret); goto cleanup; } } } ret = size; cleanup: ocfs2_xattr_bucket_free(xs->bucket); brelse(xs->xattr_bh); xs->xattr_bh = NULL; return ret; } int ocfs2_xattr_get_nolock(struct inode *inode, struct buffer_head *di_bh, int name_index, const char *name, void *buffer, size_t buffer_size) { int ret; struct ocfs2_dinode *di = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_xattr_search xis = { .not_found = -ENODATA, }; struct ocfs2_xattr_search xbs = { .not_found = -ENODATA, }; if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) return -ENODATA; xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, buffer_size, &xis); if (ret == -ENODATA && di->i_xattr_loc) ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); return ret; } /* ocfs2_xattr_get() * * Copy an extended attribute into the buffer provided. * Buffer is NULL to compute the size of buffer required. */ static int ocfs2_xattr_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size) { int ret, had_lock; struct buffer_head *di_bh = NULL; struct ocfs2_lock_holder oh; had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); if (had_lock < 0) { mlog_errno(had_lock); return had_lock; } down_read(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, name, buffer, buffer_size); up_read(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); return ret; } static int __ocfs2_xattr_set_value_outside(struct inode *inode, handle_t *handle, struct ocfs2_xattr_value_buf *vb, const void *value, int value_len) { int ret = 0, i, cp_len; u16 blocksize = inode->i_sb->s_blocksize; u32 p_cluster, num_clusters; u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); u64 blkno; struct buffer_head *bh = NULL; unsigned int ext_flags; struct ocfs2_xattr_value_root *xv = vb->vb_xv; BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); while (cpos < clusters) { ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, &num_clusters, &xv->xr_list, &ext_flags); if (ret) { mlog_errno(ret); goto out; } BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); for (i = 0; i < num_clusters * bpc; i++, blkno++) { ret = ocfs2_read_block(INODE_CACHE(inode), blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_journal_access(handle, INODE_CACHE(inode), bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; } cp_len = value_len > blocksize ? blocksize : value_len; memcpy(bh->b_data, value, cp_len); value_len -= cp_len; value += cp_len; if (cp_len < blocksize) memset(bh->b_data + cp_len, 0, blocksize - cp_len); ocfs2_journal_dirty(handle, bh); brelse(bh); bh = NULL; /* * XXX: do we need to empty all the following * blocks in this cluster? */ if (!value_len) break; } cpos += num_clusters; } out: brelse(bh); return ret; } static int ocfs2_xa_check_space_helper(int needed_space, int free_start, int num_entries) { int free_space; if (!needed_space) return 0; free_space = free_start - sizeof(struct ocfs2_xattr_header) - (num_entries * sizeof(struct ocfs2_xattr_entry)) - OCFS2_XATTR_HEADER_GAP; if (free_space < 0) return -EIO; if (free_space < needed_space) return -ENOSPC; return 0; } static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, int type) { return loc->xl_ops->xlo_journal_access(handle, loc, type); } static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) { loc->xl_ops->xlo_journal_dirty(handle, loc); } /* Give a pointer into the storage for the given offset */ static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) { BUG_ON(offset >= loc->xl_size); return loc->xl_ops->xlo_offset_pointer(loc, offset); } /* * Wipe the name+value pair and allow the storage to reclaim it. This * must be followed by either removal of the entry or a call to * ocfs2_xa_add_namevalue(). */ static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) { loc->xl_ops->xlo_wipe_namevalue(loc); } /* * Find lowest offset to a name+value pair. This is the start of our * downward-growing free space. */ static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) { return loc->xl_ops->xlo_get_free_start(loc); } /* Can we reuse loc->xl_entry for xi? */ static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { return loc->xl_ops->xlo_can_reuse(loc, xi); } /* How much free space is needed to set the new value */ static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { return loc->xl_ops->xlo_check_space(loc, xi); } static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) { loc->xl_ops->xlo_add_entry(loc, name_hash); loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); /* * We can't leave the new entry's xe_name_offset at zero or * add_namevalue() will go nuts. We set it to the size of our * storage so that it can never be less than any other entry. */ loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); } static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { int size = namevalue_size_xi(xi); int nameval_offset; char *nameval_buf; loc->xl_ops->xlo_add_namevalue(loc, size); loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); loc->xl_entry->xe_name_len = xi->xi_name_len; ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); ocfs2_xattr_set_local(loc->xl_entry, xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); memset(nameval_buf, 0, size); memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); } static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_value_buf *vb) { int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); /* Value bufs are for value trees */ BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); BUG_ON(namevalue_size_xe(loc->xl_entry) != (name_size + OCFS2_XATTR_ROOT_SIZE)); loc->xl_ops->xlo_fill_value_buf(loc, vb); vb->vb_xv = (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, nameval_offset + name_size); } static int ocfs2_xa_block_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, int type) { struct buffer_head *bh = loc->xl_storage; ocfs2_journal_access_func access; if (loc->xl_size == (bh->b_size - offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header))) access = ocfs2_journal_access_xb; else access = ocfs2_journal_access_di; return access(handle, INODE_CACHE(loc->xl_inode), bh, type); } static void ocfs2_xa_block_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) { struct buffer_head *bh = loc->xl_storage; ocfs2_journal_dirty(handle, bh); } static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, int offset) { return (char *)loc->xl_header + offset; } static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { /* * Block storage is strict. If the sizes aren't exact, we will * remove the old one and reinsert the new. */ return namevalue_size_xe(loc->xl_entry) == namevalue_size_xi(xi); } static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) { struct ocfs2_xattr_header *xh = loc->xl_header; int i, count = le16_to_cpu(xh->xh_count); int offset, free_start = loc->xl_size; for (i = 0; i < count; i++) { offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); if (offset < free_start) free_start = offset; } return free_start; } static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { int count = le16_to_cpu(loc->xl_header->xh_count); int free_start = ocfs2_xa_get_free_start(loc); int needed_space = ocfs2_xi_entry_usage(xi); /* * Block storage will reclaim the original entry before inserting * the new value, so we only need the difference. If the new * entry is smaller than the old one, we don't need anything. */ if (loc->xl_entry) { /* Don't need space if we're reusing! */ if (ocfs2_xa_can_reuse_entry(loc, xi)) needed_space = 0; else needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); } if (needed_space < 0) needed_space = 0; return ocfs2_xa_check_space_helper(needed_space, free_start, count); } /* * Block storage for xattrs keeps the name+value pairs compacted. When * we remove one, we have to shift any that preceded it towards the end. */ static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) { int i, offset; int namevalue_offset, first_namevalue_offset, namevalue_size; struct ocfs2_xattr_entry *entry = loc->xl_entry; struct ocfs2_xattr_header *xh = loc->xl_header; int count = le16_to_cpu(xh->xh_count); namevalue_offset = le16_to_cpu(entry->xe_name_offset); namevalue_size = namevalue_size_xe(entry); first_namevalue_offset = ocfs2_xa_get_free_start(loc); /* Shift the name+value pairs */ memmove((char *)xh + first_namevalue_offset + namevalue_size, (char *)xh + first_namevalue_offset, namevalue_offset - first_namevalue_offset); memset((char *)xh + first_namevalue_offset, 0, namevalue_size); /* Now tell xh->xh_entries about it */ for (i = 0; i < count; i++) { offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); if (offset <= namevalue_offset) le16_add_cpu(&xh->xh_entries[i].xe_name_offset, namevalue_size); } /* * Note that we don't update xh_free_start or xh_name_value_len * because they're not used in block-stored xattrs. */ } static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) { int count = le16_to_cpu(loc->xl_header->xh_count); loc->xl_entry = &(loc->xl_header->xh_entries[count]); le16_add_cpu(&loc->xl_header->xh_count, 1); memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); } static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) { int free_start = ocfs2_xa_get_free_start(loc); loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); } static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_value_buf *vb) { struct buffer_head *bh = loc->xl_storage; if (loc->xl_size == (bh->b_size - offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header))) vb->vb_access = ocfs2_journal_access_xb; else vb->vb_access = ocfs2_journal_access_di; vb->vb_bh = bh; } /* * Operations for xattrs stored in blocks. This includes inline inode * storage and unindexed ocfs2_xattr_blocks. */ static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { .xlo_journal_access = ocfs2_xa_block_journal_access, .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, .xlo_check_space = ocfs2_xa_block_check_space, .xlo_can_reuse = ocfs2_xa_block_can_reuse, .xlo_get_free_start = ocfs2_xa_block_get_free_start, .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, .xlo_add_entry = ocfs2_xa_block_add_entry, .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, }; static int ocfs2_xa_bucket_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, int type) { struct ocfs2_xattr_bucket *bucket = loc->xl_storage; return ocfs2_xattr_bucket_journal_access(handle, bucket, type); } static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) { struct ocfs2_xattr_bucket *bucket = loc->xl_storage; ocfs2_xattr_bucket_journal_dirty(handle, bucket); } static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, int offset) { struct ocfs2_xattr_bucket *bucket = loc->xl_storage; int block, block_offset; /* The header is at the front of the bucket */ block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; block_offset = offset % loc->xl_inode->i_sb->s_blocksize; return bucket_block(bucket, block) + block_offset; } static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { return namevalue_size_xe(loc->xl_entry) >= namevalue_size_xi(xi); } static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) { struct ocfs2_xattr_bucket *bucket = loc->xl_storage; return le16_to_cpu(bucket_xh(bucket)->xh_free_start); } static int ocfs2_bucket_align_free_start(struct super_block *sb, int free_start, int size) { /* * We need to make sure that the name+value pair fits within * one block. */ if (((free_start - size) >> sb->s_blocksize_bits) != ((free_start - 1) >> sb->s_blocksize_bits)) free_start -= free_start % sb->s_blocksize; return free_start; } static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { int rc; int count = le16_to_cpu(loc->xl_header->xh_count); int free_start = ocfs2_xa_get_free_start(loc); int needed_space = ocfs2_xi_entry_usage(xi); int size = namevalue_size_xi(xi); struct super_block *sb = loc->xl_inode->i_sb; /* * Bucket storage does not reclaim name+value pairs it cannot * reuse. They live as holes until the bucket fills, and then * the bucket is defragmented. However, the bucket can reclaim * the ocfs2_xattr_entry. */ if (loc->xl_entry) { /* Don't need space if we're reusing! */ if (ocfs2_xa_can_reuse_entry(loc, xi)) needed_space = 0; else needed_space -= sizeof(struct ocfs2_xattr_entry); } BUG_ON(needed_space < 0); if (free_start < size) { if (needed_space) return -ENOSPC; } else { /* * First we check if it would fit in the first place. * Below, we align the free start to a block. This may * slide us below the minimum gap. By checking unaligned * first, we avoid that error. */ rc = ocfs2_xa_check_space_helper(needed_space, free_start, count); if (rc) return rc; free_start = ocfs2_bucket_align_free_start(sb, free_start, size); } return ocfs2_xa_check_space_helper(needed_space, free_start, count); } static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) { le16_add_cpu(&loc->xl_header->xh_name_value_len, -namevalue_size_xe(loc->xl_entry)); } static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) { struct ocfs2_xattr_header *xh = loc->xl_header; int count = le16_to_cpu(xh->xh_count); int low = 0, high = count - 1, tmp; struct ocfs2_xattr_entry *tmp_xe; /* * We keep buckets sorted by name_hash, so we need to find * our insert place. */ while (low <= high && count) { tmp = (low + high) / 2; tmp_xe = &xh->xh_entries[tmp]; if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) low = tmp + 1; else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) high = tmp - 1; else { low = tmp; break; } } if (low != count) memmove(&xh->xh_entries[low + 1], &xh->xh_entries[low], ((count - low) * sizeof(struct ocfs2_xattr_entry))); le16_add_cpu(&xh->xh_count, 1); loc->xl_entry = &xh->xh_entries[low]; memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); } static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) { int free_start = ocfs2_xa_get_free_start(loc); struct ocfs2_xattr_header *xh = loc->xl_header; struct super_block *sb = loc->xl_inode->i_sb; int nameval_offset; free_start = ocfs2_bucket_align_free_start(sb, free_start, size); nameval_offset = free_start - size; loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); xh->xh_free_start = cpu_to_le16(nameval_offset); le16_add_cpu(&xh->xh_name_value_len, size); } static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_value_buf *vb) { struct ocfs2_xattr_bucket *bucket = loc->xl_storage; struct super_block *sb = loc->xl_inode->i_sb; int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); int size = namevalue_size_xe(loc->xl_entry); int block_offset = nameval_offset >> sb->s_blocksize_bits; /* Values are not allowed to straddle block boundaries */ BUG_ON(block_offset != ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); /* We expect the bucket to be filled in */ BUG_ON(!bucket->bu_bhs[block_offset]); vb->vb_access = ocfs2_journal_access; vb->vb_bh = bucket->bu_bhs[block_offset]; } /* Operations for xattrs stored in buckets. */ static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { .xlo_journal_access = ocfs2_xa_bucket_journal_access, .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, .xlo_check_space = ocfs2_xa_bucket_check_space, .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, .xlo_add_entry = ocfs2_xa_bucket_add_entry, .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, }; static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) { struct ocfs2_xattr_value_buf vb; if (ocfs2_xattr_is_local(loc->xl_entry)) return 0; ocfs2_xa_fill_value_buf(loc, &vb); return le32_to_cpu(vb.vb_xv->xr_clusters); } static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, struct ocfs2_xattr_set_ctxt *ctxt) { int trunc_rc, access_rc; struct ocfs2_xattr_value_buf vb; ocfs2_xa_fill_value_buf(loc, &vb); trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, ctxt); /* * The caller of ocfs2_xa_value_truncate() has already called * ocfs2_xa_journal_access on the loc. However, The truncate code * calls ocfs2_extend_trans(). This may commit the previous * transaction and open a new one. If this is a bucket, truncate * could leave only vb->vb_bh set up for journaling. Meanwhile, * the caller is expecting to dirty the entire bucket. So we must * reset the journal work. We do this even if truncate has failed, * as it could have failed after committing the extend. */ access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, OCFS2_JOURNAL_ACCESS_WRITE); /* Errors in truncate take precedence */ return trunc_rc ? trunc_rc : access_rc; } static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) { int index, count; struct ocfs2_xattr_header *xh = loc->xl_header; struct ocfs2_xattr_entry *entry = loc->xl_entry; ocfs2_xa_wipe_namevalue(loc); loc->xl_entry = NULL; le16_add_cpu(&xh->xh_count, -1); count = le16_to_cpu(xh->xh_count); /* * Only zero out the entry if there are more remaining. This is * important for an empty bucket, as it keeps track of the * bucket's hash value. It doesn't hurt empty block storage. */ if (count) { index = ((char *)entry - (char *)&xh->xh_entries) / sizeof(struct ocfs2_xattr_entry); memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], (count - index) * sizeof(struct ocfs2_xattr_entry)); memset(&xh->xh_entries[count], 0, sizeof(struct ocfs2_xattr_entry)); } } /* * If we have a problem adjusting the size of an external value during * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr * in an intermediate state. For example, the value may be partially * truncated. * * If the value tree hasn't changed, the extend/truncate went nowhere. * We have nothing to do. The caller can treat it as a straight error. * * If the value tree got partially truncated, we now have a corrupted * extended attribute. We're going to wipe its entry and leak the * clusters. Better to leak some storage than leave a corrupt entry. * * If the value tree grew, it obviously didn't grow enough for the * new entry. We're not going to try and reclaim those clusters either. * If there was already an external value there (orig_clusters != 0), * the new clusters are attached safely and we can just leave the old * value in place. If there was no external value there, we remove * the entry. * * This way, the xattr block we store in the journal will be consistent. * If the size change broke because of the journal, no changes will hit * disk anyway. */ static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, const char *what, unsigned int orig_clusters) { unsigned int new_clusters = ocfs2_xa_value_clusters(loc); char *nameval_buf = ocfs2_xa_offset_pointer(loc, le16_to_cpu(loc->xl_entry->xe_name_offset)); if (new_clusters < orig_clusters) { mlog(ML_ERROR, "Partial truncate while %s xattr %.*s. Leaking " "%u clusters and removing the entry\n", what, loc->xl_entry->xe_name_len, nameval_buf, orig_clusters - new_clusters); ocfs2_xa_remove_entry(loc); } else if (!orig_clusters) { mlog(ML_ERROR, "Unable to allocate an external value for xattr " "%.*s safely. Leaking %u clusters and removing the " "entry\n", loc->xl_entry->xe_name_len, nameval_buf, new_clusters - orig_clusters); ocfs2_xa_remove_entry(loc); } else if (new_clusters > orig_clusters) mlog(ML_ERROR, "Unable to grow xattr %.*s safely. %u new clusters " "have been added, but the value will not be " "modified\n", loc->xl_entry->xe_name_len, nameval_buf, new_clusters - orig_clusters); } static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_set_ctxt *ctxt) { int rc = 0; unsigned int orig_clusters; if (!ocfs2_xattr_is_local(loc->xl_entry)) { orig_clusters = ocfs2_xa_value_clusters(loc); rc = ocfs2_xa_value_truncate(loc, 0, ctxt); if (rc) { mlog_errno(rc); /* * Since this is remove, we can return 0 if * ocfs2_xa_cleanup_value_truncate() is going to * wipe the entry anyway. So we check the * cluster count as well. */ if (orig_clusters != ocfs2_xa_value_clusters(loc)) rc = 0; ocfs2_xa_cleanup_value_truncate(loc, "removing", orig_clusters); goto out; } } ocfs2_xa_remove_entry(loc); out: return rc; } static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) { int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); char *nameval_buf; nameval_buf = ocfs2_xa_offset_pointer(loc, le16_to_cpu(loc->xl_entry->xe_name_offset)); memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); } /* * Take an existing entry and make it ready for the new value. This * won't allocate space, but it may free space. It should be ready for * ocfs2_xa_prepare_entry() to finish the work. */ static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_set_ctxt *ctxt) { int rc = 0; int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); unsigned int orig_clusters; char *nameval_buf; int xe_local = ocfs2_xattr_is_local(loc->xl_entry); int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != name_size); nameval_buf = ocfs2_xa_offset_pointer(loc, le16_to_cpu(loc->xl_entry->xe_name_offset)); if (xe_local) { memset(nameval_buf + name_size, 0, namevalue_size_xe(loc->xl_entry) - name_size); if (!xi_local) ocfs2_xa_install_value_root(loc); } else { orig_clusters = ocfs2_xa_value_clusters(loc); if (xi_local) { rc = ocfs2_xa_value_truncate(loc, 0, ctxt); if (rc < 0) mlog_errno(rc); else memset(nameval_buf + name_size, 0, namevalue_size_xe(loc->xl_entry) - name_size); } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > xi->xi_value_len) { rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); if (rc < 0) mlog_errno(rc); } if (rc) { ocfs2_xa_cleanup_value_truncate(loc, "reusing", orig_clusters); goto out; } } loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); ocfs2_xattr_set_local(loc->xl_entry, xi_local); out: return rc; } /* * Prepares loc->xl_entry to receive the new xattr. This includes * properly setting up the name+value pair region. If loc->xl_entry * already exists, it will take care of modifying it appropriately. * * Note that this modifies the data. You did journal_access already, * right? */ static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi, u32 name_hash, struct ocfs2_xattr_set_ctxt *ctxt) { int rc = 0; unsigned int orig_clusters; __le64 orig_value_size = 0; rc = ocfs2_xa_check_space(loc, xi); if (rc) goto out; if (loc->xl_entry) { if (ocfs2_xa_can_reuse_entry(loc, xi)) { orig_value_size = loc->xl_entry->xe_value_size; rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); if (rc) goto out; goto alloc_value; } if (!ocfs2_xattr_is_local(loc->xl_entry)) { orig_clusters = ocfs2_xa_value_clusters(loc); rc = ocfs2_xa_value_truncate(loc, 0, ctxt); if (rc) { mlog_errno(rc); ocfs2_xa_cleanup_value_truncate(loc, "overwriting", orig_clusters); goto out; } } ocfs2_xa_wipe_namevalue(loc); } else ocfs2_xa_add_entry(loc, name_hash); /* * If we get here, we have a blank entry. Fill it. We grow our * name+value pair back from the end. */ ocfs2_xa_add_namevalue(loc, xi); if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) ocfs2_xa_install_value_root(loc); alloc_value: if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { orig_clusters = ocfs2_xa_value_clusters(loc); rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); if (rc < 0) { ctxt->set_abort = 1; ocfs2_xa_cleanup_value_truncate(loc, "growing", orig_clusters); /* * If we were growing an existing value, * ocfs2_xa_cleanup_value_truncate() won't remove * the entry. We need to restore the original value * size. */ if (loc->xl_entry) { BUG_ON(!orig_value_size); loc->xl_entry->xe_value_size = orig_value_size; } mlog_errno(rc); } } out: return rc; } /* * Store the value portion of the name+value pair. This will skip * values that are stored externally. Their tree roots were set up * by ocfs2_xa_prepare_entry(). */ static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_set_ctxt *ctxt) { int rc = 0; int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); char *nameval_buf; struct ocfs2_xattr_value_buf vb; nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { ocfs2_xa_fill_value_buf(loc, &vb); rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, ctxt->handle, &vb, xi->xi_value, xi->xi_value_len); } else memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); return rc; } static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, xi->xi_name_len); ret = ocfs2_xa_journal_access(ctxt->handle, loc, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } /* * From here on out, everything is going to modify the buffer a * little. Errors are going to leave the xattr header in a * sane state. Thus, even with errors we dirty the sucker. */ /* Don't worry, we are never called with !xi_value and !xl_entry */ if (!xi->xi_value) { ret = ocfs2_xa_remove(loc, ctxt); goto out_dirty; } ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); if (ret) { if (ret != -ENOSPC) mlog_errno(ret); goto out_dirty; } ret = ocfs2_xa_store_value(loc, xi, ctxt); if (ret) mlog_errno(ret); out_dirty: ocfs2_xa_journal_dirty(ctxt->handle, loc); out: return ret; } static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, struct inode *inode, struct buffer_head *bh, struct ocfs2_xattr_entry *entry) { struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); loc->xl_inode = inode; loc->xl_ops = &ocfs2_xa_block_loc_ops; loc->xl_storage = bh; loc->xl_entry = entry; loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); loc->xl_header = (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - loc->xl_size); } static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, struct inode *inode, struct buffer_head *bh, struct ocfs2_xattr_entry *entry) { struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)bh->b_data; BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); loc->xl_inode = inode; loc->xl_ops = &ocfs2_xa_block_loc_ops; loc->xl_storage = bh; loc->xl_header = &(xb->xb_attrs.xb_header); loc->xl_entry = entry; loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); } static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_bucket *bucket, struct ocfs2_xattr_entry *entry) { loc->xl_inode = bucket->bu_inode; loc->xl_ops = &ocfs2_xa_bucket_loc_ops; loc->xl_storage = bucket; loc->xl_header = bucket_xh(bucket); loc->xl_entry = entry; loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; } /* * In xattr remove, if it is stored outside and refcounted, we may have * the chance to split the refcount tree. So need the allocators. */ static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, struct ocfs2_xattr_value_root *xv, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, struct ocfs2_alloc_context **meta_ac, int *ref_credits) { int ret, meta_add = 0; u32 p_cluster, num_clusters; unsigned int ext_flags; *ref_credits = 0; ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, &num_clusters, &xv->xr_list, &ext_flags); if (ret) { mlog_errno(ret); goto out; } if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) goto out; ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, ref_root_bh, xv, &meta_add, ref_credits); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), meta_add, meta_ac); if (ret) mlog_errno(ret); out: return ret; } static int ocfs2_remove_value_outside(struct inode*inode, struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_header *header, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh) { int ret = 0, i, ref_credits; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; void *val; ocfs2_init_dealloc_ctxt(&ctxt.dealloc); for (i = 0; i < le16_to_cpu(header->xh_count); i++) { struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; if (ocfs2_xattr_is_local(entry)) continue; val = (void *)header + le16_to_cpu(entry->xe_name_offset); vb->vb_xv = (struct ocfs2_xattr_value_root *) (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, ref_ci, ref_root_bh, &ctxt.meta_ac, &ref_credits); ctxt.handle = ocfs2_start_trans(osb, ref_credits + ocfs2_remove_extent_credits(osb->sb)); if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); break; } ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); ocfs2_commit_trans(osb, ctxt.handle); if (ctxt.meta_ac) { ocfs2_free_alloc_context(ctxt.meta_ac); ctxt.meta_ac = NULL; } if (ret < 0) { mlog_errno(ret); break; } } if (ctxt.meta_ac) ocfs2_free_alloc_context(ctxt.meta_ac); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &ctxt.dealloc); return ret; } static int ocfs2_xattr_ibody_remove(struct inode *inode, struct buffer_head *di_bh, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh) { struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_xattr_header *header; int ret; struct ocfs2_xattr_value_buf vb = { .vb_bh = di_bh, .vb_access = ocfs2_journal_access_di, }; header = (struct ocfs2_xattr_header *) ((void *)di + inode->i_sb->s_blocksize - le16_to_cpu(di->i_xattr_inline_size)); ret = ocfs2_remove_value_outside(inode, &vb, header, ref_ci, ref_root_bh); return ret; } struct ocfs2_rm_xattr_bucket_para { struct ocfs2_caching_info *ref_ci; struct buffer_head *ref_root_bh; }; static int ocfs2_xattr_block_remove(struct inode *inode, struct buffer_head *blk_bh, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh) { struct ocfs2_xattr_block *xb; int ret = 0; struct ocfs2_xattr_value_buf vb = { .vb_bh = blk_bh, .vb_access = ocfs2_journal_access_xb, }; struct ocfs2_rm_xattr_bucket_para args = { .ref_ci = ref_ci, .ref_root_bh = ref_root_bh, }; xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); ret = ocfs2_remove_value_outside(inode, &vb, header, ref_ci, ref_root_bh); } else ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, ocfs2_rm_xattr_cluster, &args); return ret; } static int ocfs2_xattr_free_block(struct inode *inode, u64 block, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh) { struct inode *xb_alloc_inode; struct buffer_head *xb_alloc_bh = NULL; struct buffer_head *blk_bh = NULL; struct ocfs2_xattr_block *xb; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); handle_t *handle; int ret = 0; u64 blk, bg_blkno; u16 bit; ret = ocfs2_read_xattr_block(inode, block, &blk_bh); if (ret < 0) { mlog_errno(ret); goto out; } ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); if (ret < 0) { mlog_errno(ret); goto out; } xb = (struct ocfs2_xattr_block *)blk_bh->b_data; blk = le64_to_cpu(xb->xb_blkno); bit = le16_to_cpu(xb->xb_suballoc_bit); if (xb->xb_suballoc_loc) bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); else bg_blkno = ocfs2_which_suballoc_group(blk, bit); xb_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, le16_to_cpu(xb->xb_suballoc_slot)); if (!xb_alloc_inode) { ret = -ENOMEM; mlog_errno(ret); goto out; } inode_lock(xb_alloc_inode); ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); if (ret < 0) { mlog_errno(ret); goto out_mutex; } handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out_unlock; } ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, bit, bg_blkno, 1); if (ret < 0) mlog_errno(ret); ocfs2_commit_trans(osb, handle); out_unlock: ocfs2_inode_unlock(xb_alloc_inode, 1); brelse(xb_alloc_bh); out_mutex: inode_unlock(xb_alloc_inode); iput(xb_alloc_inode); out: brelse(blk_bh); return ret; } /* * ocfs2_xattr_remove() * * Free extended attribute resources associated with this inode. */ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) { struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_refcount_tree *ref_tree = NULL; struct buffer_head *ref_root_bh = NULL; struct ocfs2_caching_info *ref_ci = NULL; handle_t *handle; int ret; if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) return 0; if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) return 0; if (ocfs2_is_refcount_inode(inode)) { ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { mlog_errno(ret); goto out; } ref_ci = &ref_tree->rf_ci; } if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { ret = ocfs2_xattr_ibody_remove(inode, di_bh, ref_ci, ref_root_bh); if (ret < 0) { mlog_errno(ret); goto out; } } if (di->i_xattr_loc) { ret = ocfs2_xattr_free_block(inode, le64_to_cpu(di->i_xattr_loc), ref_ci, ref_root_bh); if (ret < 0) { mlog_errno(ret); goto out; } } handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; } di->i_xattr_loc = 0; spin_lock(&oi->ip_lock); oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); spin_unlock(&oi->ip_lock); ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, di_bh); out_commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: if (ref_tree) ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); brelse(ref_root_bh); return ret; } static int ocfs2_xattr_has_space_inline(struct inode *inode, struct ocfs2_dinode *di) { struct ocfs2_inode_info *oi = OCFS2_I(inode); unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; int free; if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) return 0; if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { struct ocfs2_inline_data *idata = &di->id2.i_data; free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); } else if (ocfs2_inode_is_fast_symlink(inode)) { free = ocfs2_fast_symlink_chars(inode->i_sb) - le64_to_cpu(di->i_size); } else { struct ocfs2_extent_list *el = &di->id2.i_list; free = (le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec)) * sizeof(struct ocfs2_extent_rec); } if (free >= xattrsize) return 1; return 0; } /* * ocfs2_xattr_ibody_find() * * Find extended attribute in inode block and * fill search info into struct ocfs2_xattr_search. */ static int ocfs2_xattr_ibody_find(struct inode *inode, int name_index, const char *name, struct ocfs2_xattr_search *xs) { struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; int ret; int has_space = 0; if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) return 0; if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { down_read(&oi->ip_alloc_sem); has_space = ocfs2_xattr_has_space_inline(inode, di); up_read(&oi->ip_alloc_sem); if (!has_space) return 0; } xs->xattr_bh = xs->inode_bh; xs->end = (void *)di + inode->i_sb->s_blocksize; if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) xs->header = (struct ocfs2_xattr_header *) (xs->end - le16_to_cpu(di->i_xattr_inline_size)); else xs->header = (struct ocfs2_xattr_header *) (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); xs->base = (void *)xs->header; xs->here = xs->header->xh_entries; /* Find the named attribute. */ if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret && ret != -ENODATA) return ret; xs->not_found = ret; } return 0; } static int ocfs2_xattr_ibody_init(struct inode *inode, struct buffer_head *di_bh, struct ocfs2_xattr_set_ctxt *ctxt) { int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int xattrsize = osb->s_xattr_inline_size; if (!ocfs2_xattr_has_space_inline(inode, di)) { ret = -ENOSPC; goto out; } ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } /* * Adjust extent record count or inline data size * to reserve space for extended attribute. */ if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { struct ocfs2_inline_data *idata = &di->id2.i_data; le16_add_cpu(&idata->id_count, -xattrsize); } else if (!(ocfs2_inode_is_fast_symlink(inode))) { struct ocfs2_extent_list *el = &di->id2.i_list; le16_add_cpu(&el->l_count, -(xattrsize / sizeof(struct ocfs2_extent_rec))); } di->i_xattr_inline_size = cpu_to_le16(xattrsize); spin_lock(&oi->ip_lock); oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); spin_unlock(&oi->ip_lock); ocfs2_journal_dirty(ctxt->handle, di_bh); out: return ret; } /* * ocfs2_xattr_ibody_set() * * Set, replace or remove an extended attribute into inode block. * */ static int ocfs2_xattr_ibody_set(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, struct ocfs2_xattr_set_ctxt *ctxt) { int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_xa_loc loc; if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) return -ENOSPC; down_write(&oi->ip_alloc_sem); if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); if (ret) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } } ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, xs->not_found ? NULL : xs->here); ret = ocfs2_xa_set(&loc, xi, ctxt); if (ret) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } xs->here = loc.xl_entry; out: up_write(&oi->ip_alloc_sem); return ret; } /* * ocfs2_xattr_block_find() * * Find extended attribute in external block and * fill search info into struct ocfs2_xattr_search. */ static int ocfs2_xattr_block_find(struct inode *inode, int name_index, const char *name, struct ocfs2_xattr_search *xs) { struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; struct buffer_head *blk_bh = NULL; struct ocfs2_xattr_block *xb; int ret = 0; if (!di->i_xattr_loc) return ret; ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); if (ret < 0) { mlog_errno(ret); return ret; } xs->xattr_bh = blk_bh; xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { xs->header = &xb->xb_attrs.xb_header; xs->base = (void *)xs->header; xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; xs->here = xs->header->xh_entries; ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); } else ret = ocfs2_xattr_index_block_find(inode, blk_bh, name_index, name, xs); if (ret && ret != -ENODATA) { xs->xattr_bh = NULL; goto cleanup; } xs->not_found = ret; return 0; cleanup: brelse(blk_bh); return ret; } static int ocfs2_create_xattr_block(struct inode *inode, struct buffer_head *inode_bh, struct ocfs2_xattr_set_ctxt *ctxt, int indexed, struct buffer_head **ret_bh) { int ret; u16 suballoc_bit_start; u32 num_got; u64 suballoc_loc, first_blkno; struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; struct buffer_head *new_bh = NULL; struct ocfs2_xattr_block *xblk; ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); goto end; } ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &first_blkno); if (ret < 0) { mlog_errno(ret); goto end; } new_bh = sb_getblk(inode->i_sb, first_blkno); if (!new_bh) { ret = -ENOMEM; mlog_errno(ret); goto end; } ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), new_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); goto end; } /* Initialize ocfs2_xattr_block */ xblk = (struct ocfs2_xattr_block *)new_bh->b_data; memset(xblk, 0, inode->i_sb->s_blocksize); strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); xblk->xb_fs_generation = cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); xblk->xb_blkno = cpu_to_le64(first_blkno); if (indexed) { struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; xr->xt_clusters = cpu_to_le32(1); xr->xt_last_eb_blk = 0; xr->xt_list.l_tree_depth = 0; xr->xt_list.l_count = cpu_to_le16( ocfs2_xattr_recs_per_xb(inode->i_sb)); xr->xt_list.l_next_free_rec = cpu_to_le16(1); xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); } ocfs2_journal_dirty(ctxt->handle, new_bh); /* Add it to the inode */ di->i_xattr_loc = cpu_to_le64(first_blkno); spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); spin_unlock(&OCFS2_I(inode)->ip_lock); ocfs2_journal_dirty(ctxt->handle, inode_bh); *ret_bh = new_bh; new_bh = NULL; end: brelse(new_bh); return ret; } /* * ocfs2_xattr_block_set() * * Set, replace or remove an extended attribute into external block. * */ static int ocfs2_xattr_block_set(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, struct ocfs2_xattr_set_ctxt *ctxt) { struct buffer_head *new_bh = NULL; struct ocfs2_xattr_block *xblk = NULL; int ret; struct ocfs2_xa_loc loc; if (!xs->xattr_bh) { ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 0, &new_bh); if (ret) { mlog_errno(ret); goto end; } xs->xattr_bh = new_bh; xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; xs->header = &xblk->xb_attrs.xb_header; xs->base = (void *)xs->header; xs->end = (void *)xblk + inode->i_sb->s_blocksize; xs->here = xs->header->xh_entries; } else xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, xs->not_found ? NULL : xs->here); ret = ocfs2_xa_set(&loc, xi, ctxt); if (!ret) xs->here = loc.xl_entry; else if ((ret != -ENOSPC) || ctxt->set_abort) goto end; else { ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); if (ret) goto end; } } if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); end: return ret; } /* Check whether the new xattr can be inserted into the inode. */ static int ocfs2_xattr_can_be_in_inode(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs) { struct ocfs2_xattr_entry *last; int free, i; size_t min_offs = xs->end - xs->base; if (!xs->header) return 0; last = xs->header->xh_entries; for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { size_t offs = le16_to_cpu(last->xe_name_offset); if (offs < min_offs) min_offs = offs; last += 1; } free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; if (free < 0) return 0; BUG_ON(!xs->not_found); if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) return 1; return 0; } static int ocfs2_calc_xattr_set_need(struct inode *inode, struct ocfs2_dinode *di, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xis, struct ocfs2_xattr_search *xbs, int *clusters_need, int *meta_need, int *credits_need) { int ret = 0, old_in_xb = 0; int clusters_add = 0, meta_add = 0, credits = 0; struct buffer_head *bh = NULL; struct ocfs2_xattr_block *xb = NULL; struct ocfs2_xattr_entry *xe = NULL; struct ocfs2_xattr_value_root *xv = NULL; char *base = NULL; int name_offset, name_len = 0; u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, xi->xi_value_len); u64 value_size; /* * Calculate the clusters we need to write. * No matter whether we replace an old one or add a new one, * we need this for writing. */ if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) credits += new_clusters * ocfs2_clusters_to_blocks(inode->i_sb, 1); if (xis->not_found && xbs->not_found) { credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { clusters_add += new_clusters; credits += ocfs2_calc_extend_credits(inode->i_sb, &def_xv.xv.xr_list); } goto meta_guess; } if (!xis->not_found) { xe = xis->here; name_offset = le16_to_cpu(xe->xe_name_offset); name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); base = xis->base; credits += OCFS2_INODE_UPDATE_CREDITS; } else { int i, block_off = 0; xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; xe = xbs->here; name_offset = le16_to_cpu(xe->xe_name_offset); name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); i = xbs->here - xbs->header->xh_entries; old_in_xb = 1; if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, bucket_xh(xbs->bucket), i, &block_off, &name_offset); base = bucket_block(xbs->bucket, block_off); credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); } else { base = xbs->base; credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; } } /* * delete a xattr doesn't need metadata and cluster allocation. * so just calculate the credits and return. * * The credits for removing the value tree will be extended * by ocfs2_remove_extent itself. */ if (!xi->xi_value) { if (!ocfs2_xattr_is_local(xe)) credits += ocfs2_remove_extent_credits(inode->i_sb); goto out; } /* do cluster allocation guess first. */ value_size = le64_to_cpu(xe->xe_value_size); if (old_in_xb) { /* * In xattr set, we always try to set the xe in inode first, * so if it can be inserted into inode successfully, the old * one will be removed from the xattr block, and this xattr * will be inserted into inode as a new xattr in inode. */ if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { clusters_add += new_clusters; credits += ocfs2_remove_extent_credits(inode->i_sb) + OCFS2_INODE_UPDATE_CREDITS; if (!ocfs2_xattr_is_local(xe)) credits += ocfs2_calc_extend_credits( inode->i_sb, &def_xv.xv.xr_list); goto out; } } if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { /* the new values will be stored outside. */ u32 old_clusters = 0; if (!ocfs2_xattr_is_local(xe)) { old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_size); xv = (struct ocfs2_xattr_value_root *) (base + name_offset + name_len); value_size = OCFS2_XATTR_ROOT_SIZE; } else xv = &def_xv.xv; if (old_clusters >= new_clusters) { credits += ocfs2_remove_extent_credits(inode->i_sb); goto out; } else { meta_add += ocfs2_extend_meta_needed(&xv->xr_list); clusters_add += new_clusters - old_clusters; credits += ocfs2_calc_extend_credits(inode->i_sb, &xv->xr_list); if (value_size >= OCFS2_XATTR_ROOT_SIZE) goto out; } } else { /* * Now the new value will be stored inside. So if the new * value is smaller than the size of value root or the old * value, we don't need any allocation, otherwise we have * to guess metadata allocation. */ if ((ocfs2_xattr_is_local(xe) && (value_size >= xi->xi_value_len)) || (!ocfs2_xattr_is_local(xe) && OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) goto out; } meta_guess: /* calculate metadata allocation. */ if (di->i_xattr_loc) { if (!xbs->xattr_bh) { ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), &bh); if (ret) { mlog_errno(ret); goto out; } xb = (struct ocfs2_xattr_block *)bh->b_data; } else xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; /* * If there is already an xattr tree, good, we can calculate * like other b-trees. Otherwise we may have the chance of * create a tree, the credit calculation is borrowed from * ocfs2_calc_extend_credits with root_el = NULL. And the * new tree will be cluster based, so no meta is needed. */ if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; meta_add += ocfs2_extend_meta_needed(el); credits += ocfs2_calc_extend_credits(inode->i_sb, el); } else credits += OCFS2_SUBALLOC_ALLOC + 1; /* * This cluster will be used either for new bucket or for * new xattr block. * If the cluster size is the same as the bucket size, one * more is needed since we may need to extend the bucket * also. */ clusters_add += 1; credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); if (OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(inode->i_sb)->s_clustersize) { credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); clusters_add += 1; } } else { credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { struct ocfs2_extent_list *el = &def_xv.xv.xr_list; meta_add += ocfs2_extend_meta_needed(el); credits += ocfs2_calc_extend_credits(inode->i_sb, el); } else { meta_add += 1; } } out: if (clusters_need) *clusters_need = clusters_add; if (meta_need) *meta_need = meta_add; if (credits_need) *credits_need = credits; brelse(bh); return ret; } static int ocfs2_init_xattr_set_ctxt(struct inode *inode, struct ocfs2_dinode *di, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xis, struct ocfs2_xattr_search *xbs, struct ocfs2_xattr_set_ctxt *ctxt, int extra_meta, int *credits) { int clusters_add, meta_add, ret; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); ocfs2_init_dealloc_ctxt(&ctxt->dealloc); ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, &clusters_add, &meta_add, credits); if (ret) { mlog_errno(ret); return ret; } meta_add += extra_meta; trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, clusters_add, *credits); if (meta_add) { ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, &ctxt->meta_ac); if (ret) { mlog_errno(ret); goto out; } } if (clusters_add) { ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); if (ret) mlog_errno(ret); } out: if (ret) { if (ctxt->meta_ac) { ocfs2_free_alloc_context(ctxt->meta_ac); ctxt->meta_ac = NULL; } /* * We cannot have an error and a non null ctxt->data_ac. */ } return ret; } static int __ocfs2_xattr_set_handle(struct inode *inode, struct ocfs2_dinode *di, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xis, struct ocfs2_xattr_search *xbs, struct ocfs2_xattr_set_ctxt *ctxt) { int ret = 0, credits, old_found; if (!xi->xi_value) { /* Remove existing extended attribute */ if (!xis->not_found) ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); else if (!xbs->not_found) ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); } else { /* We always try to set extended attribute into inode first*/ ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); if (!ret && !xbs->not_found) { /* * If succeed and that extended attribute existing in * external block, then we will remove it. */ xi->xi_value = NULL; xi->xi_value_len = 0; old_found = xis->not_found; xis->not_found = -ENODATA; ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, NULL, NULL, &credits); xis->not_found = old_found; if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_extend_trans(ctxt->handle, credits); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); } else if ((ret == -ENOSPC) && !ctxt->set_abort) { if (di->i_xattr_loc && !xbs->xattr_bh) { ret = ocfs2_xattr_block_find(inode, xi->xi_name_index, xi->xi_name, xbs); if (ret) goto out; old_found = xis->not_found; xis->not_found = -ENODATA; ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, NULL, NULL, &credits); xis->not_found = old_found; if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_extend_trans(ctxt->handle, credits); if (ret) { mlog_errno(ret); goto out; } } /* * If no space in inode, we will set extended attribute * into external block. */ ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); if (ret) goto out; if (!xis->not_found) { /* * If succeed and that extended attribute * existing in inode, we will remove it. */ xi->xi_value = NULL; xi->xi_value_len = 0; xbs->not_found = -ENODATA; ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, NULL, NULL, &credits); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_extend_trans(ctxt->handle, credits); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); } } } if (!ret) { /* Update inode ctime. */ ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), xis->inode_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } inode_set_ctime_current(inode); di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); } out: return ret; } /* * This function only called duing creating inode * for init security/acl xattrs of the new inode. * All transanction credits have been reserved in mknod. */ int ocfs2_xattr_set_handle(handle_t *handle, struct inode *inode, struct buffer_head *di_bh, int name_index, const char *name, const void *value, size_t value_len, int flags, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac) { struct ocfs2_dinode *di; int ret; struct ocfs2_xattr_info xi = { .xi_name_index = name_index, .xi_name = name, .xi_name_len = strlen(name), .xi_value = value, .xi_value_len = value_len, }; struct ocfs2_xattr_search xis = { .not_found = -ENODATA, }; struct ocfs2_xattr_search xbs = { .not_found = -ENODATA, }; struct ocfs2_xattr_set_ctxt ctxt = { .handle = handle, .meta_ac = meta_ac, .data_ac = data_ac, }; if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; /* * In extreme situation, may need xattr bucket when * block size is too small. And we have already reserved * the credits for bucket in mknod. */ if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { xbs.bucket = ocfs2_xattr_bucket_new(inode); if (!xbs.bucket) { mlog_errno(-ENOMEM); return -ENOMEM; } } xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; down_write(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); if (ret) goto cleanup; if (xis.not_found) { ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); if (ret) goto cleanup; } ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); cleanup: up_write(&OCFS2_I(inode)->ip_xattr_sem); brelse(xbs.xattr_bh); ocfs2_xattr_bucket_free(xbs.bucket); return ret; } /* * ocfs2_xattr_set() * * Set, replace or remove an extended attribute for this inode. * value is NULL to remove an existing extended attribute, else either * create or replace an extended attribute. */ int ocfs2_xattr_set(struct inode *inode, int name_index, const char *name, const void *value, size_t value_len, int flags) { struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; struct ocfs2_refcount_tree *ref_tree = NULL; struct ocfs2_lock_holder oh; struct ocfs2_xattr_info xi = { .xi_name_index = name_index, .xi_name = name, .xi_name_len = strlen(name), .xi_value = value, .xi_value_len = value_len, }; struct ocfs2_xattr_search xis = { .not_found = -ENODATA, }; struct ocfs2_xattr_search xbs = { .not_found = -ENODATA, }; if (!ocfs2_supports_xattr(osb)) return -EOPNOTSUPP; /* * Only xbs will be used on indexed trees. xis doesn't need a * bucket. */ xbs.bucket = ocfs2_xattr_bucket_new(inode); if (!xbs.bucket) { mlog_errno(-ENOMEM); return -ENOMEM; } had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); if (had_lock < 0) { ret = had_lock; mlog_errno(ret); goto cleanup_nolock; } xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; down_write(&OCFS2_I(inode)->ip_xattr_sem); /* * Scan inode and external block to find the same name * extended attribute and collect search information. */ ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); if (ret) goto cleanup; if (xis.not_found) { ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); if (ret) goto cleanup; } if (xis.not_found && xbs.not_found) { ret = -ENODATA; if (flags & XATTR_REPLACE) goto cleanup; ret = 0; if (!value) goto cleanup; } else { ret = -EEXIST; if (flags & XATTR_CREATE) goto cleanup; } /* Check whether the value is refcounted and do some preparation. */ if (ocfs2_is_refcount_inode(inode) && (!xis.not_found || !xbs.not_found)) { ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, &xis, &xbs, &ref_tree, &ref_meta, &ref_credits); if (ret) { mlog_errno(ret); goto cleanup; } } inode_lock(tl_inode); if (ocfs2_truncate_log_needs_flush(osb)) { ret = __ocfs2_flush_truncate_log(osb); if (ret < 0) { inode_unlock(tl_inode); mlog_errno(ret); goto cleanup; } } inode_unlock(tl_inode); ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, &xbs, &ctxt, ref_meta, &credits); if (ret) { mlog_errno(ret); goto cleanup; } /* we need to update inode's ctime field, so add credit for it. */ credits += OCFS2_INODE_UPDATE_CREDITS; ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); goto out_free_ac; } ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); ocfs2_commit_trans(osb, ctxt.handle); out_free_ac: if (ctxt.data_ac) ocfs2_free_alloc_context(ctxt.data_ac); if (ctxt.meta_ac) ocfs2_free_alloc_context(ctxt.meta_ac); if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &ctxt.dealloc); cleanup: if (ref_tree) ocfs2_unlock_refcount_tree(osb, ref_tree, 1); up_write(&OCFS2_I(inode)->ip_xattr_sem); if (!value && !ret) { ret = ocfs2_try_remove_refcount_tree(inode, di_bh); if (ret) mlog_errno(ret); } ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); cleanup_nolock: brelse(di_bh); brelse(xbs.xattr_bh); ocfs2_xattr_bucket_free(xbs.bucket); return ret; } /* * Find the xattr extent rec which may contains name_hash. * e_cpos will be the first name hash of the xattr rec. * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. */ static int ocfs2_xattr_get_rec(struct inode *inode, u32 name_hash, u64 *p_blkno, u32 *e_cpos, u32 *num_clusters, struct ocfs2_extent_list *el) { int ret = 0, i; struct buffer_head *eb_bh = NULL; struct ocfs2_extent_block *eb; struct ocfs2_extent_rec *rec = NULL; u64 e_blkno = 0; if (el->l_tree_depth) { ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, &eb_bh); if (ret) { mlog_errno(ret); goto out; } eb = (struct ocfs2_extent_block *) eb_bh->b_data; el = &eb->h_list; if (el->l_tree_depth) { ret = ocfs2_error(inode->i_sb, "Inode %lu has non zero tree depth in xattr tree block %llu\n", inode->i_ino, (unsigned long long)eb_bh->b_blocknr); goto out; } } for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { rec = &el->l_recs[i]; if (le32_to_cpu(rec->e_cpos) <= name_hash) { e_blkno = le64_to_cpu(rec->e_blkno); break; } } if (!e_blkno) { ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", inode->i_ino, le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); goto out; } *p_blkno = le64_to_cpu(rec->e_blkno); *num_clusters = le16_to_cpu(rec->e_leaf_clusters); if (e_cpos) *e_cpos = le32_to_cpu(rec->e_cpos); out: brelse(eb_bh); return ret; } typedef int (xattr_bucket_func)(struct inode *inode, struct ocfs2_xattr_bucket *bucket, void *para); static int ocfs2_find_xe_in_bucket(struct inode *inode, struct ocfs2_xattr_bucket *bucket, int name_index, const char *name, u32 name_hash, u16 *xe_index, int *found) { int i, ret = 0, cmp = 1, block_off, new_offset; struct ocfs2_xattr_header *xh = bucket_xh(bucket); size_t name_len = strlen(name); struct ocfs2_xattr_entry *xe = NULL; char *xe_name; /* * We don't use binary search in the bucket because there * may be multiple entries with the same name hash. */ for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { xe = &xh->xh_entries[i]; if (name_hash > le32_to_cpu(xe->xe_name_hash)) continue; else if (name_hash < le32_to_cpu(xe->xe_name_hash)) break; cmp = name_index - ocfs2_xattr_get_type(xe); if (!cmp) cmp = name_len - xe->xe_name_len; if (cmp) continue; ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh, i, &block_off, &new_offset); if (ret) { mlog_errno(ret); break; } xe_name = bucket_block(bucket, block_off) + new_offset; if (!memcmp(name, xe_name, name_len)) { *xe_index = i; *found = 1; ret = 0; break; } } return ret; } /* * Find the specified xattr entry in a series of buckets. * This series start from p_blkno and last for num_clusters. * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains * the num of the valid buckets. * * Return the buffer_head this xattr should reside in. And if the xattr's * hash is in the gap of 2 buckets, return the lower bucket. */ static int ocfs2_xattr_bucket_find(struct inode *inode, int name_index, const char *name, u32 name_hash, u64 p_blkno, u32 first_hash, u32 num_clusters, struct ocfs2_xattr_search *xs) { int ret, found = 0; struct ocfs2_xattr_header *xh = NULL; struct ocfs2_xattr_entry *xe = NULL; u16 index = 0; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int low_bucket = 0, bucket, high_bucket; struct ocfs2_xattr_bucket *search; u64 blkno, lower_blkno = 0; search = ocfs2_xattr_bucket_new(inode); if (!search) { ret = -ENOMEM; mlog_errno(ret); goto out; } ret = ocfs2_read_xattr_bucket(search, p_blkno); if (ret) { mlog_errno(ret); goto out; } xh = bucket_xh(search); high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; while (low_bucket <= high_bucket) { ocfs2_xattr_bucket_relse(search); bucket = (low_bucket + high_bucket) / 2; blkno = p_blkno + bucket * blk_per_bucket; ret = ocfs2_read_xattr_bucket(search, blkno); if (ret) { mlog_errno(ret); goto out; } xh = bucket_xh(search); xe = &xh->xh_entries[0]; if (name_hash < le32_to_cpu(xe->xe_name_hash)) { high_bucket = bucket - 1; continue; } /* * Check whether the hash of the last entry in our * bucket is larger than the search one. for an empty * bucket, the last one is also the first one. */ if (xh->xh_count) xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; /* record lower_blkno which may be the insert place. */ lower_blkno = blkno; if (name_hash > le32_to_cpu(xe->xe_name_hash)) { low_bucket = bucket + 1; continue; } /* the searched xattr should reside in this bucket if exists. */ ret = ocfs2_find_xe_in_bucket(inode, search, name_index, name, name_hash, &index, &found); if (ret) { mlog_errno(ret); goto out; } break; } /* * Record the bucket we have found. * When the xattr's hash value is in the gap of 2 buckets, we will * always set it to the previous bucket. */ if (!lower_blkno) lower_blkno = p_blkno; /* This should be in cache - we just read it during the search */ ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); if (ret) { mlog_errno(ret); goto out; } xs->header = bucket_xh(xs->bucket); xs->base = bucket_block(xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; if (found) { xs->here = &xs->header->xh_entries[index]; trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, name, name_index, name_hash, (unsigned long long)bucket_blkno(xs->bucket), index); } else ret = -ENODATA; out: ocfs2_xattr_bucket_free(search); return ret; } static int ocfs2_xattr_index_block_find(struct inode *inode, struct buffer_head *root_bh, int name_index, const char *name, struct ocfs2_xattr_search *xs) { int ret; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)root_bh->b_data; struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; struct ocfs2_extent_list *el = &xb_root->xt_list; u64 p_blkno = 0; u32 first_hash, num_clusters = 0; u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); if (le16_to_cpu(el->l_next_free_rec) == 0) return -ENODATA; trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, name, name_index, name_hash, (unsigned long long)root_bh->b_blocknr, -1); ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, &num_clusters, el); if (ret) { mlog_errno(ret); goto out; } BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, name, name_index, first_hash, (unsigned long long)p_blkno, num_clusters); ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, p_blkno, first_hash, num_clusters, xs); out: return ret; } static int ocfs2_iterate_xattr_buckets(struct inode *inode, u64 blkno, u32 clusters, xattr_bucket_func *func, void *para) { int i, ret = 0; u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); u32 num_buckets = clusters * bpc; struct ocfs2_xattr_bucket *bucket; bucket = ocfs2_xattr_bucket_new(inode); if (!bucket) { mlog_errno(-ENOMEM); return -ENOMEM; } trace_ocfs2_iterate_xattr_buckets( (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)blkno, clusters); for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { ret = ocfs2_read_xattr_bucket(bucket, blkno); if (ret) { mlog_errno(ret); break; } /* * The real bucket num in this series of blocks is stored * in the 1st bucket. */ if (i == 0) num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); if (func) { ret = func(inode, bucket, para); if (ret && ret != -ERANGE) mlog_errno(ret); /* Fall through to bucket_relse() */ } ocfs2_xattr_bucket_relse(bucket); if (ret) break; } ocfs2_xattr_bucket_free(bucket); return ret; } struct ocfs2_xattr_tree_list { char *buffer; size_t buffer_size; size_t result; }; static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, struct ocfs2_xattr_header *xh, int index, int *block_off, int *new_offset) { u16 name_offset; if (index < 0 || index >= le16_to_cpu(xh->xh_count)) return -EINVAL; name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); *block_off = name_offset >> sb->s_blocksize_bits; *new_offset = name_offset % sb->s_blocksize; return 0; } static int ocfs2_list_xattr_bucket(struct inode *inode, struct ocfs2_xattr_bucket *bucket, void *para) { int ret = 0, type; struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; int i, block_off, new_offset; const char *name; for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; type = ocfs2_xattr_get_type(entry); ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, bucket_xh(bucket), i, &block_off, &new_offset); if (ret) break; name = (const char *)bucket_block(bucket, block_off) + new_offset; ret = ocfs2_xattr_list_entry(inode->i_sb, xl->buffer, xl->buffer_size, &xl->result, type, name, entry->xe_name_len); if (ret) break; } return ret; } static int ocfs2_iterate_xattr_index_block(struct inode *inode, struct buffer_head *blk_bh, xattr_tree_rec_func *rec_func, void *para) { struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)blk_bh->b_data; struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; int ret = 0; u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; u64 p_blkno = 0; if (!el->l_next_free_rec || !rec_func) return 0; while (name_hash > 0) { ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, &num_clusters, el); if (ret) { mlog_errno(ret); break; } ret = rec_func(inode, blk_bh, p_blkno, e_cpos, num_clusters, para); if (ret) { if (ret != -ERANGE) mlog_errno(ret); break; } if (e_cpos == 0) break; name_hash = e_cpos - 1; } return ret; } static int ocfs2_list_xattr_tree_rec(struct inode *inode, struct buffer_head *root_bh, u64 blkno, u32 cpos, u32 len, void *para) { return ocfs2_iterate_xattr_buckets(inode, blkno, len, ocfs2_list_xattr_bucket, para); } static int ocfs2_xattr_tree_list_index_block(struct inode *inode, struct buffer_head *blk_bh, char *buffer, size_t buffer_size) { int ret; struct ocfs2_xattr_tree_list xl = { .buffer = buffer, .buffer_size = buffer_size, .result = 0, }; ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, ocfs2_list_xattr_tree_rec, &xl); if (ret) { mlog_errno(ret); goto out; } ret = xl.result; out: return ret; } static int cmp_xe(const void *a, const void *b) { const struct ocfs2_xattr_entry *l = a, *r = b; u32 l_hash = le32_to_cpu(l->xe_name_hash); u32 r_hash = le32_to_cpu(r->xe_name_hash); if (l_hash > r_hash) return 1; if (l_hash < r_hash) return -1; return 0; } /* * When the ocfs2_xattr_block is filled up, new bucket will be created * and all the xattr entries will be moved to the new bucket. * The header goes at the start of the bucket, and the names+values are * filled from the end. This is why *target starts as the last buffer. * Note: we need to sort the entries since they are not saved in order * in the ocfs2_xattr_block. */ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, struct buffer_head *xb_bh, struct ocfs2_xattr_bucket *bucket) { int i, blocksize = inode->i_sb->s_blocksize; int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u16 offset, size, off_change; struct ocfs2_xattr_entry *xe; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; struct ocfs2_xattr_header *xh = bucket_xh(bucket); u16 count = le16_to_cpu(xb_xh->xh_count); char *src = xb_bh->b_data; char *target = bucket_block(bucket, blks - 1); trace_ocfs2_cp_xattr_block_to_bucket_begin( (unsigned long long)xb_bh->b_blocknr, (unsigned long long)bucket_blkno(bucket)); for (i = 0; i < blks; i++) memset(bucket_block(bucket, i), 0, blocksize); /* * Since the xe_name_offset is based on ocfs2_xattr_header, * there is a offset change corresponding to the change of * ocfs2_xattr_header's position. */ off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); xe = &xb_xh->xh_entries[count - 1]; offset = le16_to_cpu(xe->xe_name_offset) + off_change; size = blocksize - offset; /* copy all the names and values. */ memcpy(target + offset, src + offset, size); /* Init new header now. */ xh->xh_count = xb_xh->xh_count; xh->xh_num_buckets = cpu_to_le16(1); xh->xh_name_value_len = cpu_to_le16(size); xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); /* copy all the entries. */ target = bucket_block(bucket, 0); offset = offsetof(struct ocfs2_xattr_header, xh_entries); size = count * sizeof(struct ocfs2_xattr_entry); memcpy(target + offset, (char *)xb_xh + offset, size); /* Change the xe offset for all the xe because of the move. */ off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); for (i = 0; i < count; i++) le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), cmp_xe, NULL); } /* * After we move xattr from block to index btree, we have to * update ocfs2_xattr_search to the new xe and base. * * When the entry is in xattr block, xattr_bh indicates the storage place. * While if the entry is in index b-tree, "bucket" indicates the * real place of the xattr. */ static void ocfs2_xattr_update_xattr_search(struct inode *inode, struct ocfs2_xattr_search *xs, struct buffer_head *old_bh) { char *buf = old_bh->b_data; struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; int i; xs->header = bucket_xh(xs->bucket); xs->base = bucket_block(xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; if (xs->not_found) return; i = xs->here - old_xh->xh_entries; xs->here = &xs->header->xh_entries[i]; } static int ocfs2_xattr_create_index_block(struct inode *inode, struct ocfs2_xattr_search *xs, struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u32 bit_off, len; u64 blkno; handle_t *handle = ctxt->handle; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *xb_bh = xs->xattr_bh; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_tree_root *xr; u16 xb_flags = le16_to_cpu(xb->xb_flags); trace_ocfs2_xattr_create_index_block_begin( (unsigned long long)xb_bh->b_blocknr); BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); BUG_ON(!xs->bucket); /* * XXX: * We can use this lock for now, and maybe move to a dedicated mutex * if performance becomes a problem later. */ down_write(&oi->ip_alloc_sem); ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 1, &bit_off, &len); if (ret) { mlog_errno(ret); goto out; } /* * The bucket may spread in many blocks, and * we will only touch the 1st block and the last block * in the whole bucket(one for entry and one for data). */ blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out; } ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - offsetof(struct ocfs2_xattr_block, xb_attrs)); xr = &xb->xb_attrs.xb_root; xr->xt_clusters = cpu_to_le32(1); xr->xt_last_eb_blk = 0; xr->xt_list.l_tree_depth = 0; xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); xr->xt_list.l_next_free_rec = cpu_to_le16(1); xr->xt_list.l_recs[0].e_cpos = 0; xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); ocfs2_journal_dirty(handle, xb_bh); out: up_write(&oi->ip_alloc_sem); return ret; } static int cmp_xe_offset(const void *a, const void *b) { const struct ocfs2_xattr_entry *l = a, *r = b; u32 l_name_offset = le16_to_cpu(l->xe_name_offset); u32 r_name_offset = le16_to_cpu(r->xe_name_offset); if (l_name_offset < r_name_offset) return 1; if (l_name_offset > r_name_offset) return -1; return 0; } /* * defrag a xattr bucket if we find that the bucket has some * holes between name/value pairs. * We will move all the name/value pairs to the end of the bucket * so that we can spare s