| 6 136 896 878 126 85 113 318 276 52 460 273 5 12 33 35 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H #include <kvm/iodev.h> #include <asm/apic.h> #include <linux/kvm_host.h> #include "hyperv.h" #include "smm.h" #define KVM_APIC_INIT 0 #define KVM_APIC_SIPI 1 #define APIC_SHORT_MASK 0xc0000 #define APIC_DEST_NOSHORT 0x0 #define APIC_DEST_MASK 0x800 #define APIC_BUS_CYCLE_NS_DEFAULT 1 #define APIC_BROADCAST 0xFF #define X2APIC_BROADCAST 0xFFFFFFFFul #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) enum lapic_mode { LAPIC_MODE_DISABLED = 0, LAPIC_MODE_INVALID = X2APIC_ENABLE, LAPIC_MODE_XAPIC = MSR_IA32_APICBASE_ENABLE, LAPIC_MODE_X2APIC = MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE, }; enum lapic_lvt_entry { LVT_TIMER, LVT_THERMAL_MONITOR, LVT_PERFORMANCE_COUNTER, LVT_LINT0, LVT_LINT1, LVT_ERROR, LVT_CMCI, KVM_APIC_MAX_NR_LVT_ENTRIES, }; #define APIC_LVTx(x) ((x) == LVT_CMCI ? APIC_LVTCMCI : APIC_LVTT + 0x10 * (x)) struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ ktime_t target_expiration; u32 timer_mode; u32 timer_mode_mask; u64 tscdeadline; u64 expired_tscdeadline; u32 timer_advance_ns; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; }; struct kvm_lapic { unsigned long base_address; struct kvm_io_device dev; struct kvm_timer lapic_timer; u32 divide_count; struct kvm_vcpu *vcpu; bool apicv_active; bool sw_enabled; bool irr_pending; bool lvt0_in_nmi_mode; /* Select registers in the vAPIC cannot be read/written. */ bool guest_apic_protected; /* Number of bits set in ISR. */ s16 isr_count; /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */ int highest_isr_cache; /** * APIC register page. The layout matches the register layout seen by * the guest 1:1, because it is accessed by the vmx microcode. * Note: Only one register, the TPR, is used by the microcode. */ void *regs; gpa_t vapic_addr; struct gfn_to_hva_cache vapic_cache; unsigned long pending_events; unsigned int sipi_vector; int nr_lvt_entries; }; struct dest_map; int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int shorthand, unsigned int dest, int dest_mode); void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec); bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr); void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, struct dest_map *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); void kvm_apic_update_apicv(struct kvm_vcpu *vcpu); int kvm_alloc_apic_access_page(struct kvm *kvm); void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, struct dest_map *dest_map); void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data); int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len); void kvm_lapic_exit(void); u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic); static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic) { apic_set_vector(vec, apic->regs + APIC_IRR); /* * irr_pending must be true if any interrupt is pending; set it after * APIC_IRR to avoid race with apic_clear_irr */ apic->irr_pending = true; } static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off) { return apic_get_reg(apic->regs, reg_off); } DECLARE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu); static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_has_noapic_vcpu)) return vcpu->arch.apic; return true; } extern struct static_key_false_deferred apic_hw_disabled; static inline bool kvm_apic_hw_enabled(struct kvm_lapic *apic) { if (static_branch_unlikely(&apic_hw_disabled.key)) return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; return true; } extern struct static_key_false_deferred apic_sw_disabled; static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic) { if (static_branch_unlikely(&apic_sw_disabled.key)) return apic->sw_enabled; return true; } static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); } static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) { return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); } static inline int apic_x2apic_mode(struct kvm_lapic *apic) { return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && vcpu->arch.apic->apicv_active; } static inline bool kvm_apic_has_pending_init_or_sipi(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events; } static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu) { return !is_smm(vcpu) && !kvm_x86_call(apic_init_signal_blocked)(vcpu); } static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, unsigned long *vcpu_bitmap); bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); static inline enum lapic_mode kvm_apic_mode(u64 apic_base) { return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); } static inline enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) { return kvm_apic_mode(vcpu->arch.apic_base); } static inline u8 kvm_xapic_id(struct kvm_lapic *apic) { return kvm_lapic_get_reg(apic, APIC_ID) >> 24; } #endif |
| 18818 13194 2200 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Supervisor Mode Access Prevention support * * Copyright (C) 2012 Intel Corporation * Author: H. Peter Anvin <hpa@linux.intel.com> */ #ifndef _ASM_X86_SMAP_H #define _ASM_X86_SMAP_H #include <asm/nops.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> #ifdef __ASSEMBLER__ #define ASM_CLAC \ ALTERNATIVE "", "clac", X86_FEATURE_SMAP #define ASM_STAC \ ALTERNATIVE "", "stac", X86_FEATURE_SMAP #else /* __ASSEMBLER__ */ /* * The CLAC/STAC instructions toggle the enforcement of * X86_FEATURE_SMAP along with X86_FEATURE_LASS. * * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page * tables. The kernel is not allowed to touch pages with that bit set * unless the AC bit is set. * * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings, * regardless of location. * * Note: a barrier is implicit in alternative(). */ static __always_inline void clac(void) { alternative("", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { alternative("", "stac", X86_FEATURE_SMAP); } /* * LASS enforcement is based on bit 63 of the virtual address. The * kernel is not allowed to touch memory in the lower half of the * virtual address space. * * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP. * * Even with the AC bit set, LASS will continue to block instruction * fetches from the user half of the address space. To allow those, * clear CR4.LASS to disable the LASS mechanism entirely. * * Note: a barrier is implicit in alternative(). */ static __always_inline void lass_clac(void) { alternative("", "clac", X86_FEATURE_LASS); } static __always_inline void lass_stac(void) { alternative("", "stac", X86_FEATURE_LASS); } static __always_inline unsigned long smap_save(void) { unsigned long flags; asm volatile ("# smap_save\n\t" ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "", "pushf; pop %0; clac", X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); return flags; } static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "", "push %0; popf", X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } /* These macros can be used in asm() statements */ #define ASM_CLAC \ ALTERNATIVE("", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ ALTERNATIVE("", "stac", X86_FEATURE_SMAP) #define ASM_CLAC_UNSAFE \ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "clac", X86_FEATURE_SMAP) #define ASM_STAC_UNSAFE \ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "stac", X86_FEATURE_SMAP) #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SMAP_H */ |
| 17 17 17 17 19 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Mellanox Technologies. All rights reserved */ #include <linux/debugfs.h> #include <linux/err.h> #include <linux/etherdevice.h> #include <linux/inet.h> #include <linux/kernel.h> #include <linux/random.h> #include <linux/slab.h> #include <net/devlink.h> #include <net/ip.h> #include <net/psample.h> #include <uapi/linux/ip.h> #include <uapi/linux/udp.h> #include "netdevsim.h" #define NSIM_PSAMPLE_REPORT_INTERVAL_MS 100 #define NSIM_PSAMPLE_INVALID_TC 0xFFFF #define NSIM_PSAMPLE_L4_DATA_LEN 100 struct nsim_dev_psample { struct delayed_work psample_dw; struct dentry *ddir; struct psample_group *group; u32 rate; u32 group_num; u32 trunc_size; int in_ifindex; int out_ifindex; u16 out_tc; u64 out_tc_occ_max; u64 latency_max; bool is_active; }; static struct sk_buff *nsim_dev_psample_skb_build(void) { int tot_len, data_len = NSIM_PSAMPLE_L4_DATA_LEN; struct sk_buff *skb; struct udphdr *udph; struct ethhdr *eth; struct iphdr *iph; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return NULL; tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len; skb_reset_mac_header(skb); eth = skb_put(skb, sizeof(struct ethhdr)); eth_random_addr(eth->h_dest); eth_random_addr(eth->h_source); eth->h_proto = htons(ETH_P_IP); skb->protocol = htons(ETH_P_IP); skb_set_network_header(skb, skb->len); iph = skb_put(skb, sizeof(struct iphdr)); iph->protocol = IPPROTO_UDP; iph->saddr = in_aton("192.0.2.1"); iph->daddr = in_aton("198.51.100.1"); iph->version = 0x4; iph->frag_off = 0; iph->ihl = 0x5; iph->tot_len = htons(tot_len); iph->id = 0; iph->ttl = 100; iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); skb_set_transport_header(skb, skb->len); udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len); get_random_bytes(&udph->source, sizeof(u16)); get_random_bytes(&udph->dest, sizeof(u16)); udph->len = htons(sizeof(struct udphdr) + data_len); return skb; } static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample, struct psample_metadata *md, unsigned int len) { md->trunc_size = psample->trunc_size ? psample->trunc_size : len; md->in_ifindex = psample->in_ifindex; md->out_ifindex = psample->out_ifindex; if (psample->out_tc != NSIM_PSAMPLE_INVALID_TC) { md->out_tc = psample->out_tc; md->out_tc_valid = 1; } if (psample->out_tc_occ_max) { u64 out_tc_occ; get_random_bytes(&out_tc_occ, sizeof(u64)); md->out_tc_occ = out_tc_occ & (psample->out_tc_occ_max - 1); md->out_tc_occ_valid = 1; } if (psample->latency_max) { u64 latency; get_random_bytes(&latency, sizeof(u64)); md->latency = latency & (psample->latency_max - 1); md->latency_valid = 1; } } static void nsim_dev_psample_report_work(struct work_struct *work) { struct nsim_dev_psample *psample; struct psample_metadata md = {}; struct sk_buff *skb; unsigned long delay; psample = container_of(work, struct nsim_dev_psample, psample_dw.work); skb = nsim_dev_psample_skb_build(); if (!skb) goto out; nsim_dev_psample_md_prepare(psample, &md, skb->len); psample_sample_packet(psample->group, skb, psample->rate, &md); consume_skb(skb); out: delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS); schedule_delayed_work(&psample->psample_dw, delay); } static int nsim_dev_psample_enable(struct nsim_dev *nsim_dev) { struct nsim_dev_psample *psample = nsim_dev->psample; struct devlink *devlink; unsigned long delay; if (psample->is_active) return -EBUSY; devlink = priv_to_devlink(nsim_dev); psample->group = psample_group_get(devlink_net(devlink), psample->group_num); if (!psample->group) return -EINVAL; delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS); schedule_delayed_work(&psample->psample_dw, delay); psample->is_active = true; return 0; } static int nsim_dev_psample_disable(struct nsim_dev *nsim_dev) { struct nsim_dev_psample *psample = nsim_dev->psample; if (!psample->is_active) return -EINVAL; psample->is_active = false; cancel_delayed_work_sync(&psample->psample_dw); psample_group_put(psample->group); return 0; } static ssize_t nsim_dev_psample_enable_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct nsim_dev *nsim_dev = file->private_data; bool enable; int err; err = kstrtobool_from_user(data, count, &enable); if (err) return err; if (enable) err = nsim_dev_psample_enable(nsim_dev); else err = nsim_dev_psample_disable(nsim_dev); return err ? err : count; } static const struct file_operations nsim_psample_enable_fops = { .open = simple_open, .write = nsim_dev_psample_enable_write, .llseek = generic_file_llseek, .owner = THIS_MODULE, }; int nsim_dev_psample_init(struct nsim_dev *nsim_dev) { struct nsim_dev_psample *psample; int err; psample = kzalloc(sizeof(*psample), GFP_KERNEL); if (!psample) return -ENOMEM; nsim_dev->psample = psample; INIT_DELAYED_WORK(&psample->psample_dw, nsim_dev_psample_report_work); psample->ddir = debugfs_create_dir("psample", nsim_dev->ddir); if (IS_ERR(psample->ddir)) { err = PTR_ERR(psample->ddir); goto err_psample_free; } /* Populate sampling parameters with sane defaults. */ psample->rate = 100; debugfs_create_u32("rate", 0600, psample->ddir, &psample->rate); psample->group_num = 10; debugfs_create_u32("group_num", 0600, psample->ddir, &psample->group_num); psample->trunc_size = 0; debugfs_create_u32("trunc_size", 0600, psample->ddir, &psample->trunc_size); psample->in_ifindex = 1; debugfs_create_u32("in_ifindex", 0600, psample->ddir, &psample->in_ifindex); psample->out_ifindex = 2; debugfs_create_u32("out_ifindex", 0600, psample->ddir, &psample->out_ifindex); psample->out_tc = 0; debugfs_create_u16("out_tc", 0600, psample->ddir, &psample->out_tc); psample->out_tc_occ_max = 10000; debugfs_create_u64("out_tc_occ_max", 0600, psample->ddir, &psample->out_tc_occ_max); psample->latency_max = 50; debugfs_create_u64("latency_max", 0600, psample->ddir, &psample->latency_max); debugfs_create_file("enable", 0200, psample->ddir, nsim_dev, &nsim_psample_enable_fops); return 0; err_psample_free: kfree(nsim_dev->psample); return err; } void nsim_dev_psample_exit(struct nsim_dev *nsim_dev) { debugfs_remove_recursive(nsim_dev->psample->ddir); if (nsim_dev->psample->is_active) { cancel_delayed_work_sync(&nsim_dev->psample->psample_dw); psample_group_put(nsim_dev->psample->group); } kfree(nsim_dev->psample); } |
| 2 2 8 8 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 | // SPDX-License-Identifier: GPL-2.0+ /* * drivers/of/property.c - Procedures for accessing and interpreting * Devicetree properties and graphs. * * Initially created by copying procedures from drivers/of/base.c. This * file contains the OF property as well as the OF graph interface * functions. * * Paul Mackerras August 1996. * Copyright (C) 1996-2005 Paul Mackerras. * * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. * {engebret|bergner}@us.ibm.com * * Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net * * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and * Grant Likely. */ #define pr_fmt(fmt) "OF: " fmt #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_graph.h> #include <linux/of_irq.h> #include <linux/string.h> #include <linux/moduleparam.h> #include "of_private.h" /** * of_property_read_bool - Find a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a boolean property in a device node. Usage on non-boolean * property types is deprecated. * * Return: true if the property exists false otherwise. */ bool of_property_read_bool(const struct device_node *np, const char *propname) { struct property *prop = of_find_property(np, propname, NULL); /* * Boolean properties should not have a value. Testing for property * presence should either use of_property_present() or just read the * property value and check the returned error code. */ if (prop && prop->length) pr_warn("%pOF: Read of boolean property '%s' with a value.\n", np, propname); return prop ? true : false; } EXPORT_SYMBOL(of_property_read_bool); /** * of_graph_is_present() - check graph's presence * @node: pointer to device_node containing graph port * * Return: True if @node has a port or ports (with a port) sub-node, * false otherwise. */ bool of_graph_is_present(const struct device_node *node) { struct device_node *ports __free(device_node) = of_get_child_by_name(node, "ports"); if (ports) node = ports; struct device_node *port __free(device_node) = of_get_child_by_name(node, "port"); return !!port; } EXPORT_SYMBOL(of_graph_is_present); /** * of_property_count_elems_of_size - Count the number of elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @elem_size: size of the individual element * * Search for a property in a device node and count the number of elements of * size elem_size in it. * * Return: The number of elements on sucess, -EINVAL if the property does not * exist or its length does not match a multiple of elem_size and -ENODATA if * the property does not have a value. */ int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; if (prop->length % elem_size != 0) { pr_err("size of %s in node %pOF is not a multiple of %d\n", propname, np, elem_size); return -EINVAL; } return prop->length / elem_size; } EXPORT_SYMBOL_GPL(of_property_count_elems_of_size); /** * of_find_property_value_of_size * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @min: minimum allowed length of property value * @max: maximum allowed length of property value (0 means unlimited) * @len: if !=NULL, actual length is written to here * * Search for a property in a device node and valid the requested size. * * Return: The property value on success, -EINVAL if the property does not * exist, -ENODATA if property does not have a value, and -EOVERFLOW if the * property data is too small or too large. * */ static void *of_find_property_value_of_size(const struct device_node *np, const char *propname, u32 min, u32 max, size_t *len) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return ERR_PTR(-EINVAL); if (!prop->value) return ERR_PTR(-ENODATA); if (prop->length < min) return ERR_PTR(-EOVERFLOW); if (max && prop->length > max) return ERR_PTR(-EOVERFLOW); if (len) *len = prop->length; return prop->value; } /** * of_property_read_u8_index - Find and read a u8 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u8 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 8-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u8 value can be decoded. */ int of_property_read_u8_index(const struct device_node *np, const char *propname, u32 index, u8 *out_value) { const u8 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = val[index]; return 0; } EXPORT_SYMBOL_GPL(of_property_read_u8_index); /** * of_property_read_u16_index - Find and read a u16 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u16 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 16-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u16 value can be decoded. */ int of_property_read_u16_index(const struct device_node *np, const char *propname, u32 index, u16 *out_value) { const u16 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be16_to_cpup(((__be16 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u16_index); /** * of_property_read_u32_index - Find and read a u32 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u32 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 32-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u32 value can be decoded. */ int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { const u32 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be32_to_cpup(((__be32 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u32_index); /** * of_property_read_u64_index - Find and read a u64 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u64 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64_index(const struct device_node *np, const char *propname, u32 index, u64 *out_value) { const u64 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be64_to_cpup(((__be64 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64_index); /** * of_property_read_variable_u8_array - Find and read an array of u8 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 8-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 8 <0x50 0x60 0x70>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u8 value can be decoded. */ int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const u8 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = *val++; return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u8_array); /** * of_property_read_variable_u16_array - Find and read an array of u16 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 16-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u16 value can be decoded. */ int of_property_read_variable_u16_array(const struct device_node *np, const char *propname, u16 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be16 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be16_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u16_array); /** * of_property_read_variable_u32_array - Find and read an array of 32 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 32-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u32 value can be decoded. */ int of_property_read_variable_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be32_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u32_array); /** * of_property_read_u64 - Find and read a 64 bit integer from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_value: pointer to return value, modified only if return value is 0. * * Search for a property in a device node and read a 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { const __be32 *val = of_find_property_value_of_size(np, propname, sizeof(*out_value), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = of_read_number(val, 2); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64); /** * of_property_read_variable_u64_array - Find and read an array of 64 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 64-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u64 value can be decoded. */ int of_property_read_variable_u64_array(const struct device_node *np, const char *propname, u64 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) { *out_values++ = of_read_number(val, 2); val += 2; } return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u64_array); /** * of_property_read_string - Find and read a string from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_string: pointer to null terminated return string, modified only if * return value is 0. * * Search for a property in a device tree node and retrieve a null * terminated string value (pointer to data, not a copy). * * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if * property does not have a value, and -EILSEQ if the string is not * null-terminated within the length of the property data. * * Note that the empty string "" has length of 1, thus -ENODATA cannot * be interpreted as an empty string. * * The out_string pointer is modified only if a valid string can be decoded. */ int of_property_read_string(const struct device_node *np, const char *propname, const char **out_string) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->length) return -ENODATA; if (strnlen(prop->value, prop->length) >= prop->length) return -EILSEQ; *out_string = prop->value; return 0; } EXPORT_SYMBOL_GPL(of_property_read_string); /** * of_property_match_string() - Find string in a list and return index * @np: pointer to the node containing the string list property * @propname: string list property name * @string: pointer to the string to search for in the string list * * Search for an exact match of string in a device node property which is a * string of lists. * * Return: the index of the first occurrence of the string on success, -EINVAL * if the property does not exist, -ENODATA if the property does not have a * value, and -EILSEQ if the string is not null-terminated within the length of * the property data. */ int of_property_match_string(const struct device_node *np, const char *propname, const char *string) { const struct property *prop = of_find_property(np, propname, NULL); size_t l; int i; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end; i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; pr_debug("comparing %s with %s\n", string, p); if (strcmp(string, p) == 0) return i; /* Found it; return index */ } return -ENODATA; } EXPORT_SYMBOL_GPL(of_property_match_string); /** * of_property_read_string_helper() - Utility helper for parsing string properties * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_strs: output array of string pointers. * @sz: number of array elements to read. * @skip: Number of strings to skip over at beginning of list. * * Don't call this function directly. It is a utility helper for the * of_property_read_string*() family of functions. */ int of_property_read_string_helper(const struct device_node *np, const char *propname, const char **out_strs, size_t sz, int skip) { const struct property *prop = of_find_property(np, propname, NULL); int l = 0, i = 0; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; if (out_strs && i >= skip) *out_strs++ = p; } i -= skip; return i <= 0 ? -ENODATA : i; } EXPORT_SYMBOL_GPL(of_property_read_string_helper); const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { const void *curv = cur; if (!prop) return NULL; if (!cur) { curv = prop->value; goto out_val; } curv += sizeof(*cur); if (curv >= prop->value + prop->length) return NULL; out_val: *pu = be32_to_cpup(curv); return curv; } EXPORT_SYMBOL_GPL(of_prop_next_u32); const char *of_prop_next_string(const struct property *prop, const char *cur) { const void *curv = cur; if (!prop) return NULL; if (!cur) return prop->value; curv += strlen(cur) + 1; if (curv >= prop->value + prop->length) return NULL; return curv; } EXPORT_SYMBOL_GPL(of_prop_next_string); /** * of_graph_parse_endpoint() - parse common endpoint node properties * @node: pointer to endpoint device_node * @endpoint: pointer to the OF endpoint data structure * * The caller should hold a reference to @node. */ int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint) { struct device_node *port_node __free(device_node) = of_get_parent(node); WARN_ONCE(!port_node, "%s(): endpoint %pOF has no parent node\n", __func__, node); memset(endpoint, 0, sizeof(*endpoint)); endpoint->local_node = node; /* * It doesn't matter whether the two calls below succeed. * If they don't then the default value 0 is used. */ of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); return 0; } EXPORT_SYMBOL(of_graph_parse_endpoint); /** * of_graph_get_port_by_id() - get the port matching a given id * @parent: pointer to the parent device node * @id: id of the port * * Return: A 'port' node pointer with refcount incremented. The caller * has to use of_node_put() on it when done. */ struct device_node *of_graph_get_port_by_id(struct device_node *parent, u32 id) { struct device_node *node __free(device_node) = of_get_child_by_name(parent, "ports"); if (node) parent = node; for_each_child_of_node_scoped(parent, port) { u32 port_id = 0; if (!of_node_name_eq(port, "port")) continue; of_property_read_u32(port, "reg", &port_id); if (id == port_id) return_ptr(port); } return NULL; } EXPORT_SYMBOL(of_graph_get_port_by_id); /** * of_graph_get_next_port() - get next port node. * @parent: pointer to the parent device node, or parent ports node * @prev: previous port node, or NULL to get first * * Parent device node can be used as @parent whether device node has ports node * or not. It will work same as ports@0 node. * * Return: A 'port' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_port(const struct device_node *parent, struct device_node *prev) { if (!parent) return NULL; if (!prev) { struct device_node *node __free(device_node) = of_get_child_by_name(parent, "ports"); if (node) parent = node; return of_get_child_by_name(parent, "port"); } do { prev = of_get_next_child(parent, prev); if (!prev) break; } while (!of_node_name_eq(prev, "port")); return prev; } EXPORT_SYMBOL(of_graph_get_next_port); /** * of_graph_get_next_port_endpoint() - get next endpoint node in port. * If it reached to end of the port, it will return NULL. * @port: pointer to the target port node * @prev: previous endpoint node, or NULL to get first * * Return: An 'endpoint' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port, struct device_node *prev) { while (1) { prev = of_get_next_child(port, prev); if (!prev) break; if (WARN(!of_node_name_eq(prev, "endpoint"), "non endpoint node is used (%pOF)", prev)) continue; break; } return prev; } EXPORT_SYMBOL(of_graph_get_next_port_endpoint); /** * of_graph_get_next_endpoint() - get next endpoint node * @parent: pointer to the parent device node * @prev: previous endpoint node, or NULL to get first * * Return: An 'endpoint' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *prev) { struct device_node *endpoint; struct device_node *port; if (!parent) return NULL; /* * Start by locating the port node. If no previous endpoint is specified * search for the first port node, otherwise get the previous endpoint * parent port node. */ if (!prev) { port = of_graph_get_next_port(parent, NULL); if (!port) { pr_debug("graph: no port node found in %pOF\n", parent); return NULL; } } else { port = of_get_parent(prev); if (WARN_ONCE(!port, "%s(): endpoint %pOF has no parent node\n", __func__, prev)) return NULL; } while (1) { /* * Now that we have a port node, get the next endpoint by * getting the next child. If the previous endpoint is NULL this * will return the first child. */ endpoint = of_graph_get_next_port_endpoint(port, prev); if (endpoint) { of_node_put(port); return endpoint; } /* No more endpoints under this port, try the next one. */ prev = NULL; port = of_graph_get_next_port(parent, port); if (!port) return NULL; } } EXPORT_SYMBOL(of_graph_get_next_endpoint); /** * of_graph_get_endpoint_by_regs() - get endpoint node of specific identifiers * @parent: pointer to the parent device node * @port_reg: identifier (value of reg property) of the parent port node * @reg: identifier (value of reg property) of the endpoint node * * Return: An 'endpoint' node pointer which is identified by reg and at the same * is the child of a port node identified by port_reg. reg and port_reg are * ignored when they are -1. Use of_node_put() on the pointer when done. */ struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { struct of_endpoint endpoint; struct device_node *node = NULL; for_each_endpoint_of_node(parent, node) { of_graph_parse_endpoint(node, &endpoint); if (((port_reg == -1) || (endpoint.port == port_reg)) && ((reg == -1) || (endpoint.id == reg))) return node; } return NULL; } EXPORT_SYMBOL(of_graph_get_endpoint_by_regs); /** * of_graph_get_remote_endpoint() - get remote endpoint node * @node: pointer to a local endpoint device_node * * Return: Remote endpoint node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_endpoint(const struct device_node *node) { /* Get remote endpoint node. */ return of_parse_phandle(node, "remote-endpoint", 0); } EXPORT_SYMBOL(of_graph_get_remote_endpoint); /** * of_graph_get_port_parent() - get port's parent node * @node: pointer to a local endpoint device_node * * Return: device node associated with endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_port_parent(struct device_node *node) { unsigned int depth; if (!node) return NULL; /* * Preserve usecount for passed in node as of_get_next_parent() * will do of_node_put() on it. */ of_node_get(node); /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); if (depth == 2 && !of_node_name_eq(node, "ports") && !of_node_name_eq(node, "in-ports") && !of_node_name_eq(node, "out-ports")) break; } return node; } EXPORT_SYMBOL(of_graph_get_port_parent); /** * of_graph_get_remote_port_parent() - get remote port's parent node * @node: pointer to a local endpoint device_node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port_parent( const struct device_node *node) { /* Get remote endpoint node. */ struct device_node *np __free(device_node) = of_graph_get_remote_endpoint(node); return of_graph_get_port_parent(np); } EXPORT_SYMBOL(of_graph_get_remote_port_parent); /** * of_graph_get_remote_port() - get remote port node * @node: pointer to a local endpoint device_node * * Return: Remote port node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port(const struct device_node *node) { struct device_node *np; /* Get remote endpoint node. */ np = of_graph_get_remote_endpoint(node); if (!np) return NULL; return of_get_next_parent(np); } EXPORT_SYMBOL(of_graph_get_remote_port); /** * of_graph_get_endpoint_count() - get the number of endpoints in a device node * @np: parent device node containing ports and endpoints * * Return: count of endpoint of this device node */ unsigned int of_graph_get_endpoint_count(const struct device_node *np) { struct device_node *endpoint; unsigned int num = 0; for_each_endpoint_of_node(np, endpoint) num++; return num; } EXPORT_SYMBOL(of_graph_get_endpoint_count); /** * of_graph_get_port_count() - get the number of port in a device or ports node * @np: pointer to the device or ports node * * Return: count of port of this device or ports node */ unsigned int of_graph_get_port_count(struct device_node *np) { unsigned int num = 0; for_each_of_graph_port(np, port) num++; return num; } EXPORT_SYMBOL(of_graph_get_port_count); /** * of_graph_get_remote_node() - get remote parent device_node for given port/endpoint * @node: pointer to parent device_node containing graph port/endpoint * @port: identifier (value of reg property) of the parent port node * @endpoint: identifier (value of reg property) of the endpoint node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_node(const struct device_node *node, u32 port, u32 endpoint) { struct device_node *endpoint_node, *remote; endpoint_node = of_graph_get_endpoint_by_regs(node, port, endpoint); if (!endpoint_node) { pr_debug("no valid endpoint (%d, %d) for node %pOF\n", port, endpoint, node); return NULL; } remote = of_graph_get_remote_port_parent(endpoint_node); of_node_put(endpoint_node); if (!remote) { pr_debug("no valid remote node\n"); return NULL; } if (!of_device_is_available(remote)) { pr_debug("not available for remote node\n"); of_node_put(remote); return NULL; } return remote; } EXPORT_SYMBOL(of_graph_get_remote_node); static struct fwnode_handle *of_fwnode_get(struct fwnode_handle *fwnode) { return of_fwnode_handle(of_node_get(to_of_node(fwnode))); } static void of_fwnode_put(struct fwnode_handle *fwnode) { of_node_put(to_of_node(fwnode)); } static bool of_fwnode_device_is_available(const struct fwnode_handle *fwnode) { return of_device_is_available(to_of_node(fwnode)); } static bool of_fwnode_device_dma_supported(const struct fwnode_handle *fwnode) { return true; } static enum dev_dma_attr of_fwnode_device_get_dma_attr(const struct fwnode_handle *fwnode) { if (of_dma_is_coherent(to_of_node(fwnode))) return DEV_DMA_COHERENT; else return DEV_DMA_NON_COHERENT; } static bool of_fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname) { return of_property_present(to_of_node(fwnode), propname); } static bool of_fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) { return of_property_read_bool(to_of_node(fwnode), propname); } static int of_fwnode_property_read_int_array(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, size_t nval) { const struct device_node *node = to_of_node(fwnode); if (!val) return of_property_count_elems_of_size(node, propname, elem_size); switch (elem_size) { case sizeof(u8): return of_property_read_u8_array(node, propname, val, nval); case sizeof(u16): return of_property_read_u16_array(node, propname, val, nval); case sizeof(u32): return of_property_read_u32_array(node, propname, val, nval); case sizeof(u64): return of_property_read_u64_array(node, propname, val, nval); } return -ENXIO; } static int of_fwnode_property_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval) { const struct device_node *node = to_of_node(fwnode); return val ? of_property_read_string_array(node, propname, val, nval) : of_property_count_strings(node, propname); } static const char *of_fwnode_get_name(const struct fwnode_handle *fwnode) { return kbasename(to_of_node(fwnode)->full_name); } static const char *of_fwnode_get_name_prefix(const struct fwnode_handle *fwnode) { /* Root needs no prefix here (its name is "/"). */ if (!to_of_node(fwnode)->parent) return ""; return "/"; } static struct fwnode_handle * of_fwnode_get_parent(const struct fwnode_handle *fwnode) { return of_fwnode_handle(of_get_parent(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return of_fwnode_handle(of_get_next_available_child(to_of_node(fwnode), to_of_node(child))); } static struct fwnode_handle * of_fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname) { const struct device_node *node = to_of_node(fwnode); struct device_node *child; for_each_available_child_of_node(node, child) if (of_node_name_eq(child, childname)) return of_fwnode_handle(child); return NULL; } static int of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args) { struct of_phandle_args of_args; unsigned int i; int ret; if (nargs_prop) ret = of_parse_phandle_with_args(to_of_node(fwnode), prop, nargs_prop, index, &of_args); else ret = of_parse_phandle_with_fixed_args(to_of_node(fwnode), prop, nargs, index, &of_args); if (ret < 0) return ret; if (!args) { of_node_put(of_args.np); return 0; } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); for (i = 0; i < NR_FWNODE_REFERENCE_ARGS; i++) args->args[i] = i < of_args.args_count ? of_args.args[i] : 0; return 0; } static struct fwnode_handle * of_fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return of_fwnode_handle(of_graph_get_next_endpoint(to_of_node(fwnode), to_of_node(prev))); } static struct fwnode_handle * of_fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode) { return of_fwnode_handle( of_graph_get_remote_endpoint(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode) { struct device_node *np; /* Get the parent of the port */ np = of_get_parent(to_of_node(fwnode)); if (!np) return NULL; /* Is this the "ports" node? If not, it's the port parent. */ if (!of_node_name_eq(np, "ports")) return of_fwnode_handle(np); return of_fwnode_handle(of_get_next_parent(np)); } static int of_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint) { const struct device_node *node = to_of_node(fwnode); struct device_node *port_node __free(device_node) = of_get_parent(node); endpoint->local_fwnode = fwnode; of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); return 0; } static const void * of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode, const struct device *dev) { return of_device_get_match_data(dev); } static void of_link_to_phandle(struct device_node *con_np, struct device_node *sup_np, u8 flags) { struct device_node *tmp_np __free(device_node) = of_node_get(sup_np); /* Check that sup_np and its ancestors are available. */ while (tmp_np) { if (of_fwnode_handle(tmp_np)->dev) break; if (!of_device_is_available(tmp_np)) return; tmp_np = of_get_next_parent(tmp_np); } fwnode_link_add(of_fwnode_handle(con_np), of_fwnode_handle(sup_np), flags); } /** * parse_prop_cells - Property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @list_name: Property name that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed name * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_prop_cells(struct device_node *np, const char *prop_name, int index, const char *list_name, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp(prop_name, list_name)) return NULL; if (__of_parse_phandle_with_args(np, list_name, cells_name, 0, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SIMPLE_PROP(fname, name, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_prop_cells(np, prop_name, index, name, cells); \ } static int strcmp_suffix(const char *str, const char *suffix) { unsigned int len, suffix_len; len = strlen(str); suffix_len = strlen(suffix); if (len <= suffix_len) return -1; return strcmp(str + len - suffix_len, suffix); } /** * parse_suffix_prop_cells - Suffix property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @suffix: Property suffix that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed suffix * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_suffix_prop_cells(struct device_node *np, const char *prop_name, int index, const char *suffix, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp_suffix(prop_name, suffix)) return NULL; if (of_parse_phandle_with_args(np, prop_name, cells_name, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SUFFIX_PROP(fname, suffix, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_suffix_prop_cells(np, prop_name, index, suffix, cells); \ } /** * struct supplier_bindings - Property parsing functions for suppliers * * @parse_prop: function name * parse_prop() finds the node corresponding to a supplier phandle * parse_prop.np: Pointer to device node holding supplier phandle property * parse_prop.prop_name: Name of property holding a phandle value * parse_prop.index: For properties holding a list of phandles, this is the * index into the list * @get_con_dev: If the consumer node containing the property is never converted * to a struct device, implement this ops so fw_devlink can use it * to find the true consumer. * @optional: Describes whether a supplier is mandatory or not * @fwlink_flags: Optional fwnode link flags to use when creating a fwnode link * for this property. * * Returns: * parse_prop() return values are * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ struct supplier_bindings { struct device_node *(*parse_prop)(struct device_node *np, const char *prop_name, int index); struct device_node *(*get_con_dev)(struct device_node *np); bool optional; u8 fwlink_flags; }; DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells") DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells") DEFINE_SIMPLE_PROP(extcon, "extcon", NULL) DEFINE_SIMPLE_PROP(nvmem_cells, "nvmem-cells", "#nvmem-cell-cells") DEFINE_SIMPLE_PROP(phys, "phys", "#phy-cells") DEFINE_SIMPLE_PROP(wakeup_parent, "wakeup-parent", NULL) DEFINE_SIMPLE_PROP(pinctrl0, "pinctrl-0", NULL) DEFINE_SIMPLE_PROP(pinctrl1, "pinctrl-1", NULL) DEFINE_SIMPLE_PROP(pinctrl2, "pinctrl-2", NULL) DEFINE_SIMPLE_PROP(pinctrl3, "pinctrl-3", NULL) DEFINE_SIMPLE_PROP(pinctrl4, "pinctrl-4", NULL) DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) DEFINE_SIMPLE_PROP(panel, "panel", NULL) DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells") DEFINE_SIMPLE_PROP(post_init_providers, "post-init-providers", NULL) DEFINE_SIMPLE_PROP(access_controllers, "access-controllers", "#access-controller-cells") DEFINE_SIMPLE_PROP(pses, "pses", "#pse-cells") DEFINE_SIMPLE_PROP(power_supplies, "power-supplies", NULL) DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") static struct device_node *parse_gpios(struct device_node *np, const char *prop_name, int index) { if (!strcmp_suffix(prop_name, ",nr-gpios")) return NULL; return parse_suffix_prop_cells(np, prop_name, index, "-gpios", "#gpio-cells"); } static struct device_node *parse_iommu_maps(struct device_node *np, const char *prop_name, int index) { if (strcmp(prop_name, "iommu-map")) return NULL; return of_parse_phandle(np, prop_name, (index * 4) + 1); } static struct device_node *parse_gpio_compat(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (strcmp(prop_name, "gpio") && strcmp(prop_name, "gpios")) return NULL; /* * Ignore node with gpio-hog property since its gpios are all provided * by its parent. */ if (of_property_read_bool(np, "gpio-hog")) return NULL; if (of_parse_phandle_with_args(np, prop_name, "#gpio-cells", index, &sup_args)) return NULL; return sup_args.np; } static struct device_node *parse_interrupts(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (!IS_ENABLED(CONFIG_OF_IRQ) || IS_ENABLED(CONFIG_PPC)) return NULL; if (strcmp(prop_name, "interrupts") && strcmp(prop_name, "interrupts-extended")) return NULL; return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } static struct device_node *parse_interrupt_map(struct device_node *np, const char *prop_name, int index) { const __be32 *imap, *imap_end; struct of_phandle_args sup_args; u32 addrcells, intcells; int imaplen; if (!IS_ENABLED(CONFIG_OF_IRQ)) return NULL; if (strcmp(prop_name, "interrupt-map")) return NULL; if (of_property_read_u32(np, "#interrupt-cells", &intcells)) return NULL; addrcells = of_bus_n_addr_cells(np); imap = of_get_property(np, "interrupt-map", &imaplen); if (!imap) return NULL; imaplen /= sizeof(*imap); imap_end = imap + imaplen; for (int i = 0; imap + addrcells + intcells + 1 < imap_end; i++) { imap += addrcells + intcells; imap = of_irq_parse_imap_parent(imap, imap_end - imap, &sup_args); if (!imap) return NULL; if (i == index) return sup_args.np; of_node_put(sup_args.np); } return NULL; } static struct device_node *parse_remote_endpoint(struct device_node *np, const char *prop_name, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); } static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, { .parse_prop = parse_iommus, .optional = true, }, { .parse_prop = parse_iommu_maps, .optional = true, }, { .parse_prop = parse_mboxes, }, { .parse_prop = parse_io_channels, }, { .parse_prop = parse_io_backends, }, { .parse_prop = parse_dmas, .optional = true, }, { .parse_prop = parse_power_domains, }, { .parse_prop = parse_hwlocks, }, { .parse_prop = parse_extcon, }, { .parse_prop = parse_nvmem_cells, }, { .parse_prop = parse_phys, }, { .parse_prop = parse_wakeup_parent, }, { .parse_prop = parse_pinctrl0, }, { .parse_prop = parse_pinctrl1, }, { .parse_prop = parse_pinctrl2, }, { .parse_prop = parse_pinctrl3, }, { .parse_prop = parse_pinctrl4, }, { .parse_prop = parse_pinctrl5, }, { .parse_prop = parse_pinctrl6, }, { .parse_prop = parse_pinctrl7, }, { .parse_prop = parse_pinctrl8, }, { .parse_prop = parse_remote_endpoint, .get_con_dev = of_graph_get_port_parent, }, { .parse_prop = parse_pwms, }, { .parse_prop = parse_resets, }, { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, { .parse_prop = parse_panel, }, { .parse_prop = parse_msi_parent, }, { .parse_prop = parse_pses, }, { .parse_prop = parse_power_supplies, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_interrupt_map, }, { .parse_prop = parse_access_controllers, }, { .parse_prop = parse_regulators, }, { .parse_prop = parse_gpio, }, { .parse_prop = parse_gpios, }, { .parse_prop = parse_post_init_providers, .fwlink_flags = FWLINK_FLAG_IGNORE, }, {} }; /** * of_link_property - Create device links to suppliers listed in a property * @con_np: The consumer device tree node which contains the property * @prop_name: Name of property to be parsed * * This function checks if the property @prop_name that is present in the * @con_np device tree node is one of the known common device tree bindings * that list phandles to suppliers. If @prop_name isn't one, this function * doesn't do anything. * * If @prop_name is one, this function attempts to create fwnode links from the * consumer device tree node @con_np to all the suppliers device tree nodes * listed in @prop_name. * * Any failed attempt to create a fwnode link will NOT result in an immediate * return. of_link_property() must create links to all the available supplier * device tree nodes even when attempts to create a link to one or more * suppliers fail. */ static int of_link_property(struct device_node *con_np, const char *prop_name) { struct device_node *phandle; const struct supplier_bindings *s = of_supplier_bindings; unsigned int i = 0; bool matched = false; /* Do not stop at first failed link, link all available suppliers. */ while (!matched && s->parse_prop) { if (s->optional && !fw_devlink_is_strict()) { s++; continue; } while ((phandle = s->parse_prop(con_np, prop_name, i))) { struct device_node *con_dev_np __free(device_node) = s->get_con_dev ? s->get_con_dev(con_np) : of_node_get(con_np); matched = true; i++; of_link_to_phandle(con_dev_np, phandle, s->fwlink_flags); of_node_put(phandle); } s++; } return 0; } static void __iomem *of_fwnode_iomap(struct fwnode_handle *fwnode, int index) { #ifdef CONFIG_OF_ADDRESS return of_iomap(to_of_node(fwnode), index); #else return NULL; #endif } static int of_fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index) { return of_irq_get(to_of_node(fwnode), index); } static int of_fwnode_add_links(struct fwnode_handle *fwnode) { const struct property *p; struct device_node *con_np = to_of_node(fwnode); if (IS_ENABLED(CONFIG_X86)) return 0; if (!con_np) return -EINVAL; for_each_property_of_node(con_np, p) of_link_property(con_np, p->name); return 0; } const struct fwnode_operations of_fwnode_ops = { .get = of_fwnode_get, .put = of_fwnode_put, .device_is_available = of_fwnode_device_is_available, .device_get_match_data = of_fwnode_device_get_match_data, .device_dma_supported = of_fwnode_device_dma_supported, .device_get_dma_attr = of_fwnode_device_get_dma_attr, .property_present = of_fwnode_property_present, .property_read_bool = of_fwnode_property_read_bool, .property_read_int_array = of_fwnode_property_read_int_array, .property_read_string_array = of_fwnode_property_read_string_array, .get_name = of_fwnode_get_name, .get_name_prefix = of_fwnode_get_name_prefix, .get_parent = of_fwnode_get_parent, .get_next_child_node = of_fwnode_get_next_child_node, .get_named_child_node = of_fwnode_get_named_child_node, .get_reference_args = of_fwnode_get_reference_args, .graph_get_next_endpoint = of_fwnode_graph_get_next_endpoint, .graph_get_remote_endpoint = of_fwnode_graph_get_remote_endpoint, .graph_get_port_parent = of_fwnode_graph_get_port_parent, .graph_parse_endpoint = of_fwnode_graph_parse_endpoint, .iomap = of_fwnode_iomap, .irq_get = of_fwnode_irq_get, .add_links = of_fwnode_add_links, }; EXPORT_SYMBOL_GPL(of_fwnode_ops); |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * ebtables * * Authors: * Bart De Schuymer <bdschuym@pandora.be> * * ebtables.c,v 2.0, April, 2002 * * This code is strongly inspired by the iptables code which is * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling */ #ifndef _UAPI__LINUX_BRIDGE_EFF_H #define _UAPI__LINUX_BRIDGE_EFF_H #include <linux/types.h> #include <linux/if.h> #include <linux/netfilter_bridge.h> #define EBT_TABLE_MAXNAMELEN 32 #define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN #define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN #define EBT_EXTENSION_MAXNAMELEN 31 /* verdicts >0 are "branches" */ #define EBT_ACCEPT -1 #define EBT_DROP -2 #define EBT_CONTINUE -3 #define EBT_RETURN -4 #define NUM_STANDARD_TARGETS 4 /* ebtables target modules store the verdict inside an int. We can * reclaim a part of this int for backwards compatible extensions. * The 4 lsb are more than enough to store the verdict. */ #define EBT_VERDICT_BITS 0x0000000F struct xt_match; struct xt_target; struct ebt_counter { __u64 pcnt; __u64 bcnt; }; struct ebt_replace { char name[EBT_TABLE_MAXNAMELEN]; unsigned int valid_hooks; /* nr of rules in the table */ unsigned int nentries; /* total size of the entries */ unsigned int entries_size; /* start of the chains */ struct ebt_entries __user *hook_entry[NF_BR_NUMHOOKS]; /* nr of counters userspace expects back */ unsigned int num_counters; /* where the kernel will put the old counters */ struct ebt_counter __user *counters; char __user *entries; }; struct ebt_replace_kernel { char name[EBT_TABLE_MAXNAMELEN]; unsigned int valid_hooks; /* nr of rules in the table */ unsigned int nentries; /* total size of the entries */ unsigned int entries_size; /* start of the chains */ struct ebt_entries *hook_entry[NF_BR_NUMHOOKS]; /* nr of counters userspace expects back */ unsigned int num_counters; /* where the kernel will put the old counters */ struct ebt_counter *counters; char *entries; }; struct ebt_entries { /* this field is always set to zero * See EBT_ENTRY_OR_ENTRIES. * Must be same size as ebt_entry.bitmask */ unsigned int distinguisher; /* the chain name */ char name[EBT_CHAIN_MAXNAMELEN]; /* counter offset for this chain */ unsigned int counter_offset; /* one standard (accept, drop, return) per hook */ int policy; /* nr. of entries */ unsigned int nentries; /* entry list */ char data[] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); }; /* used for the bitmask of struct ebt_entry */ /* This is a hack to make a difference between an ebt_entry struct and an * ebt_entries struct when traversing the entries from start to end. * Using this simplifies the code a lot, while still being able to use * ebt_entries. * Contrary, iptables doesn't use something like ebt_entries and therefore uses * different techniques for naming the policy and such. So, iptables doesn't * need a hack like this. */ #define EBT_ENTRY_OR_ENTRIES 0x01 /* these are the normal masks */ #define EBT_NOPROTO 0x02 #define EBT_802_3 0x04 #define EBT_SOURCEMAC 0x08 #define EBT_DESTMAC 0x10 #define EBT_F_MASK (EBT_NOPROTO | EBT_802_3 | EBT_SOURCEMAC | EBT_DESTMAC \ | EBT_ENTRY_OR_ENTRIES) #define EBT_IPROTO 0x01 #define EBT_IIN 0x02 #define EBT_IOUT 0x04 #define EBT_ISOURCE 0x8 #define EBT_IDEST 0x10 #define EBT_ILOGICALIN 0x20 #define EBT_ILOGICALOUT 0x40 #define EBT_INV_MASK (EBT_IPROTO | EBT_IIN | EBT_IOUT | EBT_ILOGICALIN \ | EBT_ILOGICALOUT | EBT_ISOURCE | EBT_IDEST) struct ebt_entry_match { union { struct { char name[EBT_EXTENSION_MAXNAMELEN]; __u8 revision; }; struct xt_match *match; } u; /* size of data */ unsigned int match_size; unsigned char data[] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); }; struct ebt_entry_watcher { union { struct { char name[EBT_EXTENSION_MAXNAMELEN]; __u8 revision; }; struct xt_target *watcher; } u; /* size of data */ unsigned int watcher_size; unsigned char data[] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); }; struct ebt_entry_target { union { struct { char name[EBT_EXTENSION_MAXNAMELEN]; __u8 revision; }; struct xt_target *target; } u; /* size of data */ unsigned int target_size; unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); }; #define EBT_STANDARD_TARGET "standard" struct ebt_standard_target { struct ebt_entry_target target; int verdict; }; /* one entry */ struct ebt_entry { /* this needs to be the first field */ unsigned int bitmask; unsigned int invflags; __be16 ethproto; /* the physical in-dev */ char in[IFNAMSIZ]; /* the logical in-dev */ char logical_in[IFNAMSIZ]; /* the physical out-dev */ char out[IFNAMSIZ]; /* the logical out-dev */ char logical_out[IFNAMSIZ]; unsigned char sourcemac[ETH_ALEN]; unsigned char sourcemsk[ETH_ALEN]; unsigned char destmac[ETH_ALEN]; unsigned char destmsk[ETH_ALEN]; __struct_group(/* no tag */, offsets, /* no attrs */, /* sizeof ebt_entry + matches */ unsigned int watchers_offset; /* sizeof ebt_entry + matches + watchers */ unsigned int target_offset; /* sizeof ebt_entry + matches + watchers + target */ unsigned int next_offset; ); unsigned char elems[] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); }; static __inline__ struct ebt_entry_target * ebt_get_target(struct ebt_entry *e) { return (struct ebt_entry_target *)((char *)e + e->target_offset); } /* {g,s}etsockopt numbers */ #define EBT_BASE_CTL 128 #define EBT_SO_SET_ENTRIES (EBT_BASE_CTL) #define EBT_SO_SET_COUNTERS (EBT_SO_SET_ENTRIES+1) #define EBT_SO_SET_MAX (EBT_SO_SET_COUNTERS+1) #define EBT_SO_GET_INFO (EBT_BASE_CTL) #define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO+1) #define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES+1) #define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO+1) #define EBT_SO_GET_MAX (EBT_SO_GET_INIT_ENTRIES+1) /* blatently stolen from ip_tables.h * fn returns 0 to continue iteration */ #define EBT_MATCH_ITERATE(e, fn, args...) \ ({ \ unsigned int __i; \ int __ret = 0; \ struct ebt_entry_match *__match; \ \ for (__i = sizeof(struct ebt_entry); \ __i < (e)->watchers_offset; \ __i += __match->match_size + \ sizeof(struct ebt_entry_match)) { \ __match = (void *)(e) + __i; \ \ __ret = fn(__match , ## args); \ if (__ret != 0) \ break; \ } \ if (__ret == 0) { \ if (__i != (e)->watchers_offset) \ __ret = -EINVAL; \ } \ __ret; \ }) #define EBT_WATCHER_ITERATE(e, fn, args...) \ ({ \ unsigned int __i; \ int __ret = 0; \ struct ebt_entry_watcher *__watcher; \ \ for (__i = e->watchers_offset; \ __i < (e)->target_offset; \ __i += __watcher->watcher_size + \ sizeof(struct ebt_entry_watcher)) { \ __watcher = (void *)(e) + __i; \ \ __ret = fn(__watcher , ## args); \ if (__ret != 0) \ break; \ } \ if (__ret == 0) { \ if (__i != (e)->target_offset) \ __ret = -EINVAL; \ } \ __ret; \ }) #define EBT_ENTRY_ITERATE(entries, size, fn, args...) \ ({ \ unsigned int __i; \ int __ret = 0; \ struct ebt_entry *__entry; \ \ for (__i = 0; __i < (size);) { \ __entry = (void *)(entries) + __i; \ __ret = fn(__entry , ## args); \ if (__ret != 0) \ break; \ if (__entry->bitmask != 0) \ __i += __entry->next_offset; \ else \ __i += sizeof(struct ebt_entries); \ } \ if (__ret == 0) { \ if (__i != (size)) \ __ret = -EINVAL; \ } \ __ret; \ }) #endif /* _UAPI__LINUX_BRIDGE_EFF_H */ |
| 9506 20 16 178 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTRACE_H #define _ASM_X86_PTRACE_H #include <asm/segment.h> #include <asm/page_types.h> #include <uapi/asm/ptrace.h> #ifndef __ASSEMBLER__ #ifdef __i386__ struct pt_regs { /* * NB: 32-bit x86 CPUs are inconsistent as what happens in the * following cases (where %seg represents a segment register): * * - pushl %seg: some do a 16-bit write and leave the high * bits alone * - movl %seg, [mem]: some do a 16-bit write despite the movl * - IDT entry: some (e.g. 486) will leave the high bits of CS * and (if applicable) SS undefined. * * Fortunately, x86-32 doesn't read the high bits on POP or IRET, * so we can just treat all of the segment registers as 16-bit * values. */ unsigned long bx; unsigned long cx; unsigned long dx; unsigned long si; unsigned long di; unsigned long bp; unsigned long ax; unsigned short ds; unsigned short __dsh; unsigned short es; unsigned short __esh; unsigned short fs; unsigned short __fsh; /* * On interrupt, gs and __gsh store the vector number. They never * store gs any more. */ unsigned short gs; unsigned short __gsh; /* On interrupt, this is the error code. */ unsigned long orig_ax; unsigned long ip; unsigned short cs; unsigned short __csh; unsigned long flags; unsigned long sp; unsigned short ss; unsigned short __ssh; }; #else /* __i386__ */ struct fred_cs { /* CS selector */ u64 cs : 16, /* Stack level at event time */ sl : 2, /* IBT in WAIT_FOR_ENDBRANCH state */ wfe : 1, : 45; }; struct fred_ss { /* SS selector */ u64 ss : 16, /* STI state */ sti : 1, /* Set if syscall, sysenter or INT n */ swevent : 1, /* Event is NMI type */ nmi : 1, : 13, /* Event vector */ vector : 8, : 8, /* Event type */ type : 4, : 4, /* Event was incident to enclave execution */ enclave : 1, /* CPU was in 64-bit mode */ l : 1, /* * Nested exception during FRED delivery, not set * for #DF. */ nested : 1, : 1, /* * The length of the instruction causing the event. * Only set for INTO, INT1, INT3, INT n, SYSCALL * and SYSENTER. 0 otherwise. */ insnlen : 4; }; struct pt_regs { /* * C ABI says these regs are callee-preserved. They aren't saved on * kernel entry unless syscall needs a complete, fully filled * "struct pt_regs". */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long bp; unsigned long bx; /* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; unsigned long r8; unsigned long ax; unsigned long cx; unsigned long dx; unsigned long si; unsigned long di; /* * orig_ax is used on entry for: * - the syscall number (syscall, sysenter, int80) * - error_code stored by the CPU on traps and exceptions * - the interrupt number for device interrupts * * A FRED stack frame starts here: * 1) It _always_ includes an error code; * * 2) The return frame for ERET[US] starts here, but * the content of orig_ax is ignored. */ unsigned long orig_ax; /* The IRETQ return frame starts here */ unsigned long ip; union { /* CS selector */ u16 cs; /* The extended 64-bit data slot containing CS */ u64 csx; /* The FRED CS extension */ struct fred_cs fred_cs; }; unsigned long flags; unsigned long sp; union { /* SS selector */ u16 ss; /* The extended 64-bit data slot containing SS */ u64 ssx; /* The FRED SS extension */ struct fred_ss fred_ss; }; /* * Top of stack on IDT systems, while FRED systems have extra fields * defined above for storing exception related information, e.g. CR2 or * DR6. */ }; #endif /* !__i386__ */ #ifdef CONFIG_PARAVIRT #include <asm/paravirt_types.h> #endif #include <asm/proto.h> struct cpuinfo_x86; struct task_struct; extern unsigned long profile_pc(struct pt_regs *regs); extern unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code); static __always_inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->ax; } static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->ax = rc; } /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the * register set was from protected mode with RPL-3 CS value. This * tricky test checks that with one comparison. * * On x86_64, vm86 mode is mercifully nonexistent, and we don't need * the extra check. */ static __always_inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; #else return !!(regs->cs & 3); #endif } static __always_inline int v8086_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return (regs->flags & X86_VM_MASK); #else return 0; /* No V86 mode support in long mode */ #endif } static inline bool user_64bit_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_64 #ifndef CONFIG_PARAVIRT_XXL /* * On non-paravirt systems, this is the only long mode CPL 3 * selector. We do not allow long mode selectors in the LDT. */ return regs->cs == __USER_CS; #else /* Headers are too twisted for this to go in paravirt.h. */ return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; #endif #else /* !CONFIG_X86_64 */ return false; #endif } /* * Determine whether the register set came from any context that is running in * 64-bit mode. */ static inline bool any_64bit_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_64 return !user_mode(regs) || user_64bit_mode(regs); #else return false; #endif } #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs) { bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack && regs->ip < (unsigned long)entry_SYSRETQ_end); #ifdef CONFIG_IA32_EMULATION ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack && regs->ip < (unsigned long)entry_SYSRETL_compat_end); #endif return ret; } #endif static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } static __always_inline unsigned long instruction_pointer(struct pt_regs *regs) { return regs->ip; } static __always_inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->ip = val; } static __always_inline unsigned long frame_pointer(struct pt_regs *regs) { return regs->bp; } static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs) { return regs->sp; } static __always_inline void user_stack_pointer_set(struct pt_regs *regs, unsigned long val) { regs->sp = val; } static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { return !(regs->flags & X86_EFLAGS_IF); } /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); extern const char *regs_query_register_name(unsigned int offset); #define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) /** * regs_get_register() - get register value from its offset * @regs: pt_regs from which register value is gotten. * @offset: offset number of the register. * * regs_get_register returns the value of a register. The @offset is the * offset of the register in struct pt_regs address which specified by @regs. * If @offset is bigger than MAX_REG_OFFSET, this returns 0. */ static inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) { if (unlikely(offset > MAX_REG_OFFSET)) return 0; #ifdef CONFIG_X86_32 /* The selector fields are 16-bit. */ if (offset == offsetof(struct pt_regs, cs) || offset == offsetof(struct pt_regs, ss) || offset == offsetof(struct pt_regs, ds) || offset == offsetof(struct pt_regs, es) || offset == offsetof(struct pt_regs, fs) || offset == offsetof(struct pt_regs, gs)) { return *(u16 *)((unsigned long)regs + offset); } #endif return *(unsigned long *)((unsigned long)regs + offset); } /** * regs_within_kernel_stack() - check the address in the stack * @regs: pt_regs which contains kernel stack pointer. * @addr: address which is checked. * * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). * If @addr is within the kernel stack, it returns true. If not, returns false. */ static inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1))); } /** * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns the address of the @n th entry of the * kernel stack which is specified by @regs. If the @n th entry is NOT in * the kernel stack, this returns NULL. */ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) { unsigned long *addr = (unsigned long *)regs->sp; addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) return addr; else return NULL; } /* To avoid include hell, we can't include uaccess.h */ extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which * is specified by @regs. If the @n th entry is NOT in the kernel stack * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) { unsigned long *addr; unsigned long val; long ret; addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } return 0; } /** * regs_get_kernel_argument() - get Nth function argument in kernel * @regs: pt_regs of that context * @n: function argument number (start from 0) * * regs_get_argument() returns @n th argument of the function call. * Note that this chooses most probably assignment, in some case * it can be incorrect. * This is expected to be called from kprobes or ftrace with regs * where the top of stack is the return address. */ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, unsigned int n) { static const unsigned int argument_offs[] = { #ifdef __i386__ offsetof(struct pt_regs, ax), offsetof(struct pt_regs, dx), offsetof(struct pt_regs, cx), #define NR_REG_ARGUMENTS 3 #else offsetof(struct pt_regs, di), offsetof(struct pt_regs, si), offsetof(struct pt_regs, dx), offsetof(struct pt_regs, cx), offsetof(struct pt_regs, r8), offsetof(struct pt_regs, r9), #define NR_REG_ARGUMENTS 6 #endif }; if (n >= NR_REG_ARGUMENTS) { n -= NR_REG_ARGUMENTS - 1; return regs_get_kernel_stack_nth(regs, n); } else return regs_get_register(regs, argument_offs[n]); } #define arch_has_single_step() (1) #ifdef CONFIG_X86_DEBUGCTLMSR #define arch_has_block_step() (1) #else #define arch_has_block_step() (boot_cpu_data.x86 >= 6) #endif #define ARCH_HAS_USER_SINGLE_STEP_REPORT struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); #ifdef CONFIG_X86_64 # define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t) #else # define do_set_thread_area_64(p, s, t) (0) #endif #endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PTRACE_H */ |
| 61 62 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 | /* * Copyright (c) 2006-2008 Intel Corporation * Copyright (c) 2007 Dave Airlie <airlied@linux.ie> * * DRM core CRTC related functions * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * * Authors: * Keith Packard * Eric Anholt <eric@anholt.net> * Dave Airlie <airlied@linux.ie> * Jesse Barnes <jesse.barnes@intel.com> */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/dynamic_debug.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_uapi.h> #include <drm/drm_bridge.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include "drm_crtc_helper_internal.h" DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", "DRM_UT_DRIVER", "DRM_UT_KMS", "DRM_UT_PRIME", "DRM_UT_ATOMIC", "DRM_UT_VBL", "DRM_UT_STATE", "DRM_UT_LEASE", "DRM_UT_DP", "DRM_UT_DRMRES"); /** * DOC: overview * * The CRTC modeset helper library provides a default set_config implementation * in drm_crtc_helper_set_config(). Plus a few other convenience functions using * the same callbacks which drivers can use to e.g. restore the modeset * configuration on resume with drm_helper_resume_force_mode(). * * Note that this helper library doesn't track the current power state of CRTCs * and encoders. It can call callbacks like &drm_encoder_helper_funcs.dpms even * though the hardware is already in the desired state. This deficiency has been * fixed in the atomic helpers. * * The driver callbacks are mostly compatible with the atomic modeset helpers, * except for the handling of the primary plane: Atomic helpers require that the * primary plane is implemented as a real standalone plane and not directly tied * to the CRTC state. For easier transition this library provides functions to * implement the old semantics required by the CRTC helpers using the new plane * and atomic helper callbacks. * * Drivers are strongly urged to convert to the atomic helpers (by way of first * converting to the plane helpers). New drivers must not use these functions * but need to implement the atomic interface instead, potentially using the * atomic helpers for that. * * These legacy modeset helpers use the same function table structures as * all other modesetting helpers. See the documentation for struct * &drm_crtc_helper_funcs, &struct drm_encoder_helper_funcs and struct * &drm_connector_helper_funcs. */ /** * drm_helper_encoder_in_use - check if a given encoder is in use * @encoder: encoder to check * * Checks whether @encoder is with the current mode setting output configuration * in use by any connector. This doesn't mean that it is actually enabled since * the DPMS state is tracked separately. * * Returns: * True if @encoder is used, false otherwise. */ bool drm_helper_encoder_in_use(struct drm_encoder *encoder) { struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = encoder->dev; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); /* * We can expect this mutex to be locked if we are not panicking. * Locking is currently fubar in the panic handler. */ if (!oops_in_progress) { drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex)); drm_WARN_ON(dev, !drm_modeset_is_locked(&dev->mode_config.connection_mutex)); } drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->encoder == encoder) { drm_connector_list_iter_end(&conn_iter); return true; } } drm_connector_list_iter_end(&conn_iter); return false; } EXPORT_SYMBOL(drm_helper_encoder_in_use); /** * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config * @crtc: CRTC to check * * Checks whether @crtc is with the current mode setting output configuration * in use by any connector. This doesn't mean that it is actually enabled since * the DPMS state is tracked separately. * * Returns: * True if @crtc is used, false otherwise. */ bool drm_helper_crtc_in_use(struct drm_crtc *crtc) { struct drm_encoder *encoder; struct drm_device *dev = crtc->dev; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); /* * We can expect this mutex to be locked if we are not panicking. * Locking is currently fubar in the panic handler. */ if (!oops_in_progress) drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex)); drm_for_each_encoder(encoder, dev) if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder)) return true; return false; } EXPORT_SYMBOL(drm_helper_crtc_in_use); static void drm_encoder_disable(struct drm_encoder *encoder) { const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; if (!encoder_funcs) return; if (encoder_funcs->disable) (*encoder_funcs->disable)(encoder); else if (encoder_funcs->dpms) (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); } static void __drm_helper_disable_unused_functions(struct drm_device *dev) { struct drm_encoder *encoder; struct drm_crtc *crtc; drm_warn_on_modeset_not_all_locked(dev); drm_for_each_encoder(encoder, dev) { if (!drm_helper_encoder_in_use(encoder)) { drm_encoder_disable(encoder); /* disconnect encoder from any connector */ encoder->crtc = NULL; } } drm_for_each_crtc(crtc, dev) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; crtc->enabled = drm_helper_crtc_in_use(crtc); if (!crtc->enabled) { if (crtc_funcs->disable) (*crtc_funcs->disable)(crtc); else (*crtc_funcs->dpms)(crtc, DRM_MODE_DPMS_OFF); crtc->primary->fb = NULL; } } } /** * drm_helper_disable_unused_functions - disable unused objects * @dev: DRM device * * This function walks through the entire mode setting configuration of @dev. It * will remove any CRTC links of unused encoders and encoder links of * disconnected connectors. Then it will disable all unused encoders and CRTCs * either by calling their disable callback if available or by calling their * dpms callback with DRM_MODE_DPMS_OFF. * * NOTE: * * This function is part of the legacy modeset helper library and will cause * major confusion with atomic drivers. This is because atomic helpers guarantee * to never call ->disable() hooks on a disabled function, or ->enable() hooks * on an enabled functions. drm_helper_disable_unused_functions() on the other * hand throws such guarantees into the wind and calls disable hooks * unconditionally on unused functions. */ void drm_helper_disable_unused_functions(struct drm_device *dev) { drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_modeset_lock_all(dev); __drm_helper_disable_unused_functions(dev); drm_modeset_unlock_all(dev); } EXPORT_SYMBOL(drm_helper_disable_unused_functions); /* * Check the CRTC we're going to map each output to vs. its current * CRTC. If they don't match, we have to disable the output and the CRTC * since the driver will have to re-route things. */ static void drm_crtc_prepare_encoders(struct drm_device *dev) { const struct drm_encoder_helper_funcs *encoder_funcs; struct drm_encoder *encoder; drm_for_each_encoder(encoder, dev) { encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; /* Disable unused encoders */ if (encoder->crtc == NULL) drm_encoder_disable(encoder); } } /** * drm_crtc_helper_set_mode - internal helper to set a mode * @crtc: CRTC to program * @mode: mode to use * @x: horizontal offset into the surface * @y: vertical offset into the surface * @old_fb: old framebuffer, for cleanup * * Try to set @mode on @crtc. Give @crtc and its associated connectors a chance * to fixup or reject the mode prior to trying to set it. This is an internal * helper that drivers could e.g. use to update properties that require the * entire output pipe to be disabled and re-enabled in a new configuration. For * example for changing whether audio is enabled on a hdmi link or for changing * panel fitter or dither attributes. It is also called by the * drm_crtc_helper_set_config() helper function to drive the mode setting * sequence. * * Returns: * True if the mode was set successfully, false otherwise. */ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, int x, int y, struct drm_framebuffer *old_fb) { struct drm_device *dev = crtc->dev; struct drm_display_mode *adjusted_mode, saved_mode, saved_hwmode; const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; const struct drm_encoder_helper_funcs *encoder_funcs; int saved_x, saved_y; bool saved_enabled; struct drm_encoder *encoder; bool ret = true; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_warn_on_modeset_not_all_locked(dev); saved_enabled = crtc->enabled; crtc->enabled = drm_helper_crtc_in_use(crtc); if (!crtc->enabled) return true; adjusted_mode = drm_mode_duplicate(dev, mode); if (!adjusted_mode) { crtc->enabled = saved_enabled; return false; } drm_mode_init(&saved_mode, &crtc->mode); drm_mode_init(&saved_hwmode, &crtc->hwmode); saved_x = crtc->x; saved_y = crtc->y; /* Update crtc values up front so the driver can rely on them for mode * setting. */ drm_mode_copy(&crtc->mode, mode); crtc->x = x; crtc->y = y; /* Pass our mode to the connectors and the CRTC to give them a chance to * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. */ drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; if (encoder_funcs->mode_fixup) { if (!(ret = encoder_funcs->mode_fixup(encoder, mode, adjusted_mode))) { drm_dbg_kms(dev, "[ENCODER:%d:%s] mode fixup failed\n", encoder->base.id, encoder->name); goto done; } } } if (crtc_funcs->mode_fixup) { if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) { drm_dbg_kms(dev, "[CRTC:%d:%s] mode fixup failed\n", crtc->base.id, crtc->name); goto done; } } drm_dbg_kms(dev, "[CRTC:%d:%s]\n", crtc->base.id, crtc->name); drm_mode_copy(&crtc->hwmode, adjusted_mode); /* Prepare the encoders and CRTCs before setting the mode. */ drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; /* Disable the encoders as the first thing we do. */ if (encoder_funcs->prepare) encoder_funcs->prepare(encoder); } drm_crtc_prepare_encoders(dev); crtc_funcs->prepare(crtc); /* Set up the DPLL and any encoders state that needs to adjust or depend * on the DPLL. */ ret = !crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y, old_fb); if (!ret) goto done; drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; drm_dbg_kms(dev, "[ENCODER:%d:%s] set [MODE:%s]\n", encoder->base.id, encoder->name, mode->name); if (encoder_funcs->mode_set) encoder_funcs->mode_set(encoder, mode, adjusted_mode); } /* Now enable the clocks, plane, pipe, and connectors that we set up. */ crtc_funcs->commit(crtc); drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; if (encoder_funcs->commit) encoder_funcs->commit(encoder); } /* Calculate and store various constants which * are later needed by vblank and swap-completion * timestamping. They are derived from true hwmode. */ drm_calc_timestamping_constants(crtc, &crtc->hwmode); /* FIXME: add subpixel order */ done: drm_mode_destroy(dev, adjusted_mode); if (!ret) { crtc->enabled = saved_enabled; drm_mode_copy(&crtc->mode, &saved_mode); drm_mode_copy(&crtc->hwmode, &saved_hwmode); crtc->x = saved_x; crtc->y = saved_y; } return ret; } EXPORT_SYMBOL(drm_crtc_helper_set_mode); /** * drm_crtc_helper_atomic_check() - Helper to check CRTC atomic-state * @crtc: CRTC to check * @state: atomic state object * * Provides a default CRTC-state check handler for CRTCs that only have * one primary plane attached to it. This is often the case for the CRTC * of simple framebuffers. * * RETURNS: * Zero on success, or an errno code otherwise. */ int drm_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *new_crtc_state = drm_atomic_get_new_crtc_state(state, crtc); if (!new_crtc_state->enable) return 0; return drm_atomic_helper_check_crtc_primary_plane(new_crtc_state); } EXPORT_SYMBOL(drm_crtc_helper_atomic_check); static void drm_crtc_helper_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_connector *connector; struct drm_encoder *encoder; /* Decouple all encoders and their attached connectors from this crtc */ drm_for_each_encoder(encoder, dev) { struct drm_connector_list_iter conn_iter; if (encoder->crtc != crtc) continue; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->encoder != encoder) continue; connector->encoder = NULL; /* * drm_helper_disable_unused_functions() ought to be * doing this, but since we've decoupled the encoder * from the connector above, the required connection * between them is henceforth no longer available. */ connector->dpms = DRM_MODE_DPMS_OFF; /* we keep a reference while the encoder is bound */ drm_connector_put(connector); } drm_connector_list_iter_end(&conn_iter); } __drm_helper_disable_unused_functions(dev); } /* * For connectors that support multiple encoders, either the * .atomic_best_encoder() or .best_encoder() operation must be implemented. */ struct drm_encoder * drm_connector_get_single_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; drm_WARN_ON(connector->dev, hweight32(connector->possible_encoders) > 1); drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; } /** * drm_crtc_helper_set_config - set a new config from userspace * @set: mode set configuration * @ctx: lock acquire context, not used here * * The drm_crtc_helper_set_config() helper function implements the of * &drm_crtc_funcs.set_config callback for drivers using the legacy CRTC * helpers. * * It first tries to locate the best encoder for each connector by calling the * connector @drm_connector_helper_funcs.best_encoder helper operation. * * After locating the appropriate encoders, the helper function will call the * mode_fixup encoder and CRTC helper operations to adjust the requested mode, * or reject it completely in which case an error will be returned to the * application. If the new configuration after mode adjustment is identical to * the current configuration the helper function will return without performing * any other operation. * * If the adjusted mode is identical to the current mode but changes to the * frame buffer need to be applied, the drm_crtc_helper_set_config() function * will call the CRTC &drm_crtc_helper_funcs.mode_set_base helper operation. * * If the adjusted mode differs from the current mode, or if the * ->mode_set_base() helper operation is not provided, the helper function * performs a full mode set sequence by calling the ->prepare(), ->mode_set() * and ->commit() CRTC and encoder helper operations, in that order. * Alternatively it can also use the dpms and disable helper operations. For * details see &struct drm_crtc_helper_funcs and struct * &drm_encoder_helper_funcs. * * This function is deprecated. New drivers must implement atomic modeset * support, for which this function is unsuitable. Instead drivers should use * drm_atomic_helper_set_config(). * * Returns: * Returns 0 on success, negative errno numbers on failure. */ int drm_crtc_helper_set_config(struct drm_mode_set *set, struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev; struct drm_crtc **save_encoder_crtcs, *new_crtc; struct drm_encoder **save_connector_encoders, *new_encoder, *encoder; bool mode_changed = false; /* if true do a full mode set */ bool fb_changed = false; /* if true and !mode_changed just do a flip */ struct drm_connector *connector; struct drm_connector_list_iter conn_iter; int count = 0, ro, fail = 0; const struct drm_crtc_helper_funcs *crtc_funcs; struct drm_mode_set save_set; int ret; int i; BUG_ON(!set); BUG_ON(!set->crtc); BUG_ON(!set->crtc->helper_private); /* Enforce sane interface api - has been abused by the fb helper. */ BUG_ON(!set->mode && set->fb); BUG_ON(set->fb && set->num_connectors == 0); crtc_funcs = set->crtc->helper_private; dev = set->crtc->dev; drm_dbg_kms(dev, "\n"); drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); if (!set->mode) set->fb = NULL; if (set->fb) { drm_dbg_kms(dev, "[CRTC:%d:%s] [FB:%d] #connectors=%d (x y) (%i %i)\n", set->crtc->base.id, set->crtc->name, set->fb->base.id, (int)set->num_connectors, set->x, set->y); } else { drm_dbg_kms(dev, "[CRTC:%d:%s] [NOFB]\n", set->crtc->base.id, set->crtc->name); drm_crtc_helper_disable(set->crtc); return 0; } drm_warn_on_modeset_not_all_locked(dev); /* * Allocate space for the backup of all (non-pointer) encoder and * connector data. */ save_encoder_crtcs = kcalloc(dev->mode_config.num_encoder, sizeof(struct drm_crtc *), GFP_KERNEL); if (!save_encoder_crtcs) return -ENOMEM; save_connector_encoders = kcalloc(dev->mode_config.num_connector, sizeof(struct drm_encoder *), GFP_KERNEL); if (!save_connector_encoders) { kfree(save_encoder_crtcs); return -ENOMEM; } /* * Copy data. Note that driver private data is not affected. * Should anything bad happen only the expected state is * restored, not the drivers personal bookkeeping. */ count = 0; drm_for_each_encoder(encoder, dev) { save_encoder_crtcs[count++] = encoder->crtc; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) save_connector_encoders[count++] = connector->encoder; drm_connector_list_iter_end(&conn_iter); save_set.crtc = set->crtc; save_set.mode = &set->crtc->mode; save_set.x = set->crtc->x; save_set.y = set->crtc->y; save_set.fb = set->crtc->primary->fb; /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ if (set->crtc->primary->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ if (set->crtc->primary->fb == NULL) { drm_dbg_kms(dev, "[CRTC:%d:%s] no fb, full mode set\n", set->crtc->base.id, set->crtc->name); mode_changed = true; } else if (set->fb->format != set->crtc->primary->fb->format) { mode_changed = true; } else fb_changed = true; } if (set->x != set->crtc->x || set->y != set->crtc->y) fb_changed = true; if (!drm_mode_equal(set->mode, &set->crtc->mode)) { drm_dbg_kms(dev, "[CRTC:%d:%s] modes are different, full mode set:\n", set->crtc->base.id, set->crtc->name); drm_dbg_kms(dev, DRM_MODE_FMT "\n", DRM_MODE_ARG(&set->crtc->mode)); drm_dbg_kms(dev, DRM_MODE_FMT "\n", DRM_MODE_ARG(set->mode)); mode_changed = true; } /* take a reference on all unbound connectors in set, reuse the * already taken reference for bound connectors */ for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro]->encoder) continue; drm_connector_get(set->connectors[ro]); } /* a) traverse passed in connector list and get encoders for them */ count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; new_encoder = connector->encoder; for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == connector) { if (connector_funcs->best_encoder) new_encoder = connector_funcs->best_encoder(connector); else new_encoder = drm_connector_get_single_encoder(connector); /* if we can't get an encoder for a connector we are setting now - then fail */ if (new_encoder == NULL) /* don't break so fail path works correct */ fail = 1; if (connector->dpms != DRM_MODE_DPMS_ON) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] DPMS not on, full mode switch\n", connector->base.id, connector->name); mode_changed = true; } break; } } if (new_encoder != connector->encoder) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] encoder changed, full mode switch\n", connector->base.id, connector->name); mode_changed = true; /* If the encoder is reused for another connector, then * the appropriate crtc will be set later. */ if (connector->encoder) connector->encoder->crtc = NULL; connector->encoder = new_encoder; } } drm_connector_list_iter_end(&conn_iter); if (fail) { ret = -EINVAL; goto fail; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (!connector->encoder) continue; if (connector->encoder->crtc == set->crtc) new_crtc = NULL; else new_crtc = connector->encoder->crtc; for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == connector) new_crtc = set->crtc; } /* Make sure the new CRTC will work with the encoder */ if (new_crtc && !drm_encoder_crtc_ok(connector->encoder, new_crtc)) { ret = -EINVAL; drm_connector_list_iter_end(&conn_iter); goto fail; } if (new_crtc != connector->encoder->crtc) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] CRTC changed, full mode switch\n", connector->base.id, connector->name); mode_changed = true; connector->encoder->crtc = new_crtc; } if (new_crtc) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] to [CRTC:%d:%s]\n", connector->base.id, connector->name, new_crtc->base.id, new_crtc->name); } else { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] to [NOCRTC]\n", connector->base.id, connector->name); } } drm_connector_list_iter_end(&conn_iter); /* mode_set_base is not a required function */ if (fb_changed && !crtc_funcs->mode_set_base) mode_changed = true; if (mode_changed) { if (drm_helper_crtc_in_use(set->crtc)) { drm_dbg_kms(dev, "[CRTC:%d:%s] attempting to set mode from userspace: " DRM_MODE_FMT "\n", set->crtc->base.id, set->crtc->name, DRM_MODE_ARG(set->mode)); set->crtc->primary->fb = set->fb; if (!drm_crtc_helper_set_mode(set->crtc, set->mode, set->x, set->y, save_set.fb)) { drm_err(dev, "[CRTC:%d:%s] failed to set mode\n", set->crtc->base.id, set->crtc->name); set->crtc->primary->fb = save_set.fb; ret = -EINVAL; goto fail; } drm_dbg_kms(dev, "[CRTC:%d:%s] Setting connector DPMS state to on\n", set->crtc->base.id, set->crtc->name); for (i = 0; i < set->num_connectors; i++) { drm_dbg_kms(dev, "\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id, set->connectors[i]->name); set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON); } } __drm_helper_disable_unused_functions(dev); } else if (fb_changed) { set->crtc->x = set->x; set->crtc->y = set->y; set->crtc->primary->fb = set->fb; ret = crtc_funcs->mode_set_base(set->crtc, set->x, set->y, save_set.fb); if (ret != 0) { set->crtc->x = save_set.x; set->crtc->y = save_set.y; set->crtc->primary->fb = save_set.fb; goto fail; } } kfree(save_connector_encoders); kfree(save_encoder_crtcs); return 0; fail: /* Restore all previous data. */ count = 0; drm_for_each_encoder(encoder, dev) { encoder->crtc = save_encoder_crtcs[count++]; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) connector->encoder = save_connector_encoders[count++]; drm_connector_list_iter_end(&conn_iter); /* after fail drop reference on all unbound connectors in set, let * bound connectors keep their reference */ for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro]->encoder) continue; drm_connector_put(set->connectors[ro]); } /* Try to restore the config */ if (mode_changed && !drm_crtc_helper_set_mode(save_set.crtc, save_set.mode, save_set.x, save_set.y, save_set.fb)) drm_err(dev, "failed to restore config after modeset failure\n"); kfree(save_connector_encoders); kfree(save_encoder_crtcs); return ret; } EXPORT_SYMBOL(drm_crtc_helper_set_config); static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder) { int dpms = DRM_MODE_DPMS_OFF; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = encoder->dev; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) if (connector->encoder == encoder) if (connector->dpms < dpms) dpms = connector->dpms; drm_connector_list_iter_end(&conn_iter); return dpms; } /* Helper which handles bridge ordering around encoder dpms */ static void drm_helper_encoder_dpms(struct drm_encoder *encoder, int mode) { const struct drm_encoder_helper_funcs *encoder_funcs; encoder_funcs = encoder->helper_private; if (!encoder_funcs) return; if (encoder_funcs->dpms) encoder_funcs->dpms(encoder, mode); } static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc) { int dpms = DRM_MODE_DPMS_OFF; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = crtc->dev; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) if (connector->encoder && connector->encoder->crtc == crtc) if (connector->dpms < dpms) dpms = connector->dpms; drm_connector_list_iter_end(&conn_iter); return dpms; } /** * drm_helper_connector_dpms() - connector dpms helper implementation * @connector: affected connector * @mode: DPMS mode * * The drm_helper_connector_dpms() helper function implements the * &drm_connector_funcs.dpms callback for drivers using the legacy CRTC * helpers. * * This is the main helper function provided by the CRTC helper framework for * implementing the DPMS connector attribute. It computes the new desired DPMS * state for all encoders and CRTCs in the output mesh and calls the * &drm_crtc_helper_funcs.dpms and &drm_encoder_helper_funcs.dpms callbacks * provided by the driver. * * This function is deprecated. New drivers must implement atomic modeset * support, where DPMS is handled in the DRM core. * * Returns: * Always returns 0. */ int drm_helper_connector_dpms(struct drm_connector *connector, int mode) { struct drm_encoder *encoder = connector->encoder; struct drm_crtc *crtc = encoder ? encoder->crtc : NULL; int old_dpms, encoder_dpms = DRM_MODE_DPMS_OFF; drm_WARN_ON(connector->dev, drm_drv_uses_atomic_modeset(connector->dev)); if (mode == connector->dpms) return 0; old_dpms = connector->dpms; connector->dpms = mode; if (encoder) encoder_dpms = drm_helper_choose_encoder_dpms(encoder); /* from off to on, do crtc then encoder */ if (mode < old_dpms) { if (crtc) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } if (encoder) drm_helper_encoder_dpms(encoder, encoder_dpms); } /* from on to off, do encoder then crtc */ if (mode > old_dpms) { if (encoder) drm_helper_encoder_dpms(encoder, encoder_dpms); if (crtc) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } } return 0; } EXPORT_SYMBOL(drm_helper_connector_dpms); /** * drm_helper_resume_force_mode - force-restore mode setting configuration * @dev: drm_device which should be restored * * Drivers which use the mode setting helpers can use this function to * force-restore the mode setting configuration e.g. on resume or when something * else might have trampled over the hw state (like some overzealous old BIOSen * tended to do). * * This helper doesn't provide a error return value since restoring the old * config should never fail due to resource allocation issues since the driver * has successfully set the restored configuration already. Hence this should * boil down to the equivalent of a few dpms on calls, which also don't provide * an error code. * * Drivers where simply restoring an old configuration again might fail (e.g. * due to slight differences in allocating shared resources when the * configuration is restored in a different order than when userspace set it up) * need to use their own restore logic. * * This function is deprecated. New drivers should implement atomic mode- * setting and use the atomic suspend/resume helpers. * * See also: * drm_atomic_helper_suspend(), drm_atomic_helper_resume() */ void drm_helper_resume_force_mode(struct drm_device *dev) { struct drm_crtc *crtc; struct drm_encoder *encoder; const struct drm_crtc_helper_funcs *crtc_funcs; int encoder_dpms; bool ret; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_modeset_lock_all(dev); drm_for_each_crtc(crtc, dev) { if (!crtc->enabled) continue; ret = drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x, crtc->y, crtc->primary->fb); /* Restoring the old config should never fail! */ if (ret == false) drm_err(dev, "failed to set mode on crtc %p\n", crtc); /* Turn off outputs that were already powered off */ if (drm_helper_choose_crtc_dpms(crtc)) { drm_for_each_encoder(encoder, dev) { if(encoder->crtc != crtc) continue; encoder_dpms = drm_helper_choose_encoder_dpms( encoder); drm_helper_encoder_dpms(encoder, encoder_dpms); } crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } } /* disable the unused connectors while restoring the modesetting */ __drm_helper_disable_unused_functions(dev); drm_modeset_unlock_all(dev); } EXPORT_SYMBOL(drm_helper_resume_force_mode); /** * drm_helper_force_disable_all - Forcibly turn off all enabled CRTCs * @dev: DRM device whose CRTCs to turn off * * Drivers may want to call this on unload to ensure that all displays are * unlit and the GPU is in a consistent, low power state. Takes modeset locks. * * Note: This should only be used by non-atomic legacy drivers. For an atomic * version look at drm_atomic_helper_shutdown(). * * Returns: * Zero on success, error code on failure. */ int drm_helper_force_disable_all(struct drm_device *dev) { struct drm_crtc *crtc; int ret = 0; drm_modeset_lock_all(dev); drm_for_each_crtc(crtc, dev) if (crtc->enabled) { struct drm_mode_set set = { .crtc = crtc, }; ret = drm_mode_set_config_internal(&set); if (ret) goto out; } out: drm_modeset_unlock_all(dev); return ret; } EXPORT_SYMBOL(drm_helper_force_disable_all); |
| 13 4 7 7 7 9 4 8 9 3 2 2 3 2 1 2 5 4 4 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for 3-way parallel assembler optimized version of Twofish * * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> */ #include <asm/cpu_device_id.h> #include <crypto/algapi.h> #include <crypto/twofish.h> #include <linux/crypto.h> #include <linux/export.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include "twofish.h" #include "ecb_cbc_helpers.h" EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return twofish_setkey(&tfm->base, key, keylen); } static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src) { u8 buf[2][TF_BLOCK_SIZE]; const u8 *s = src; if (dst == src) s = memcpy(buf, src, sizeof(buf)); twofish_dec_blk_3way(ctx, dst, src); crypto_xor(dst + TF_BLOCK_SIZE, s, sizeof(buf)); } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, TF_BLOCK_SIZE, -1); ECB_BLOCK(3, twofish_enc_blk_3way); ECB_BLOCK(1, twofish_enc_blk); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, TF_BLOCK_SIZE, -1); ECB_BLOCK(3, twofish_dec_blk_3way); ECB_BLOCK(1, twofish_dec_blk); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, TF_BLOCK_SIZE, -1); CBC_ENC_BLOCK(twofish_enc_blk); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, TF_BLOCK_SIZE, -1); CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); CBC_DEC_BLOCK(1, twofish_dec_blk); CBC_WALK_END(); } static struct skcipher_alg tf_skciphers[] = { { .base.cra_name = "ecb(twofish)", .base.cra_driver_name = "ecb-twofish-3way", .base.cra_priority = 300, .base.cra_blocksize = TF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct twofish_ctx), .base.cra_module = THIS_MODULE, .min_keysize = TF_MIN_KEY_SIZE, .max_keysize = TF_MAX_KEY_SIZE, .setkey = twofish_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "cbc(twofish)", .base.cra_driver_name = "cbc-twofish-3way", .base.cra_priority = 300, .base.cra_blocksize = TF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct twofish_ctx), .base.cra_module = THIS_MODULE, .min_keysize = TF_MIN_KEY_SIZE, .max_keysize = TF_MAX_KEY_SIZE, .ivsize = TF_BLOCK_SIZE, .setkey = twofish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, }; static bool is_blacklisted_cpu(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; switch (boot_cpu_data.x86_vfm) { case INTEL_ATOM_BONNELL: case INTEL_ATOM_BONNELL_MID: case INTEL_ATOM_SALTWELL: /* * On Atom, twofish-3way is slower than original assembler * implementation. Twofish-3way trades off some performance in * storing blocks in 64bit registers to allow three blocks to * be processed parallel. Parallel operation then allows gaining * more performance than was trade off, on out-of-order CPUs. * However Atom does not benefit from this parallelism and * should be blacklisted. */ return true; } if (boot_cpu_data.x86 == 0x0f) { /* * On Pentium 4, twofish-3way is slower than original assembler * implementation because excessive uses of 64bit rotate and * left-shifts (which are really slow on P4) needed to store and * handle 128bit block in two 64bit registers. */ return true; } return false; } static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init twofish_3way_init(void) { if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "twofish-x86_64-3way: performance on this CPU " "would be suboptimal: disabling " "twofish-x86_64-3way.\n"); return -ENODEV; } return crypto_register_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } static void __exit twofish_3way_fini(void) { crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } module_init(twofish_3way_init); module_exit(twofish_3way_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); MODULE_ALIAS_CRYPTO("twofish"); MODULE_ALIAS_CRYPTO("twofish-asm"); |
| 111 61 42 48 21 18 38 39 43 26 35 39 39 39 38 2 35 39 38 2 60 61 61 60 59 39 59 59 39 58 39 58 35 36 35 1 35 36 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | /* * INETPEER - A storage for permanent information about peers * * This source is covered by the GNU GPL, the same as all kernel sources. * * Authors: Andrey V. Savochkin <saw@msu.ru> */ #include <linux/cache.h> #include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/random.h> #include <linux/timer.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/net.h> #include <linux/workqueue.h> #include <net/ip.h> #include <net/inetpeer.h> #include <net/secure_seq.h> /* * Theory of operations. * We keep one entry for each peer IP address. The nodes contains long-living * information about the peer which doesn't depend on routes. * * Nodes are removed only when reference counter goes to 0. * When it's happened the node may be removed when a sufficient amount of * time has been passed since its last use. The less-recently-used entry can * also be removed if the pool is overloaded i.e. if the total amount of * entries is greater-or-equal than the threshold. * * Node pool is organised as an RB tree. * Such an implementation has been chosen not just for fun. It's a way to * prevent easy and efficient DoS attacks by creating hash collisions. A huge * amount of long living nodes in a single hash slot would significantly delay * lookups performed with disabled BHs. * * Serialisation issues. * 1. Nodes may appear in the tree only with the pool lock held. * 2. Nodes may disappear from the tree only with the pool lock held * AND reference count being 0. * 3. Global variable peer_total is modified under the pool lock. * 4. struct inet_peer fields modification: * rb_node: pool lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable */ static struct kmem_cache *peer_cachep __ro_after_init; void inet_peer_base_init(struct inet_peer_base *bp) { bp->rb_root = RB_ROOT; seqlock_init(&bp->lock); bp->total = 0; } EXPORT_IPV6_MOD_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly; /* start to throw entries more * aggressively at this stage */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { u64 nr_entries; /* 1% of physical memory */ nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT, 100 * L1_CACHE_ALIGN(sizeof(struct inet_peer))); inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128); peer_cachep = KMEM_CACHE(inet_peer, SLAB_HWCACHE_ALIGN | SLAB_PANIC); } /* Called with rcu_read_lock() or base->lock held */ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, struct inet_peer_base *base, unsigned int seq, struct inet_peer *gc_stack[], unsigned int *gc_cnt, struct rb_node **parent_p, struct rb_node ***pp_p) { struct rb_node **pp, *parent, *next; struct inet_peer *p; u32 now; pp = &base->rb_root.rb_node; parent = NULL; while (1) { int cmp; next = rcu_dereference_raw(*pp); if (!next) break; parent = next; p = rb_entry(parent, struct inet_peer, rb_node); cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { now = jiffies; if (READ_ONCE(p->dtime) != now) WRITE_ONCE(p->dtime, now); return p; } if (gc_stack) { if (*gc_cnt < PEER_MAX_GC) gc_stack[(*gc_cnt)++] = p; } else if (unlikely(read_seqretry(&base->lock, seq))) { break; } if (cmp == -1) pp = &next->rb_left; else pp = &next->rb_right; } *parent_p = parent; *pp_p = pp; return NULL; } /* perform garbage collect on all items stacked during a lookup */ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; peer_threshold = READ_ONCE(inet_peer_threshold); peer_maxttl = READ_ONCE(inet_peer_maxttl); peer_minttl = READ_ONCE(inet_peer_minttl); if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; delta = (__u32)jiffies - READ_ONCE(p->dtime); if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) gc_stack[i] = NULL; } for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; if (p) { rb_erase(&p->rb_node, &base->rb_root); base->total--; kfree_rcu(p, rcu); } } } /* Must be called under RCU : No refcount change is done here. */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr) { struct inet_peer *p, *gc_stack[PEER_MAX_GC]; struct rb_node **pp, *parent; unsigned int gc_cnt, seq; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); if (p) return p; /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ parent = NULL; write_seqlock_bh(&base->lock); gc_cnt = 0; p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); if (!p) { p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); if (p) { p->daddr = *daddr; p->dtime = (__u32)jiffies; refcount_set(&p->refcnt, 1); atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ p->rate_last = jiffies - 60*HZ; rb_link_node(&p->rb_node, parent, pp); rb_insert_color(&p->rb_node, &base->rb_root); base->total++; } } if (gc_cnt) inet_peer_gc(base, gc_stack, gc_cnt); write_sequnlock_bh(&base->lock); return p; } EXPORT_IPV6_MOD_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { if (refcount_dec_and_test(&p->refcnt)) kfree_rcu(p, rcu); } /* * Check transmit rate limitation for given message. * The rate information is held in the inet_peer entries now. * This function is generic and could be used for other purposes * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. * * Note that the same inet_peer fields are modified by functions in * route.c too, but these work for packet destinations while xrlim_allow * works for icmp destinations. This means the rate limiting information * for one "ip object" is shared - and these ICMPs are twice limited: * by source and by destination. * * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate * SHOULD allow setting of rate limits * * Shared between ICMPv4 and ICMPv6. */ #define XRLIM_BURST_FACTOR 6 bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) { unsigned long now, token, otoken, delta; bool rc = false; if (!peer) return true; token = otoken = READ_ONCE(peer->rate_tokens); now = jiffies; delta = now - READ_ONCE(peer->rate_last); if (delta) { WRITE_ONCE(peer->rate_last, now); token += delta; if (token > XRLIM_BURST_FACTOR * timeout) token = XRLIM_BURST_FACTOR * timeout; } if (token >= timeout) { token -= timeout; rc = true; } if (token != otoken) WRITE_ONCE(peer->rate_tokens, token); return rc; } EXPORT_IPV6_MOD(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { struct rb_node *p = rb_first(&base->rb_root); while (p) { struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node); p = rb_next(p); rb_erase(&peer->rb_node, &base->rb_root); inet_putpeer(peer); cond_resched(); } base->total = 0; } EXPORT_IPV6_MOD(inetpeer_invalidate_tree); |
| 12 8 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | #undef TRACE_SYSTEM #define TRACE_SYSTEM irq_matrix #if !defined(_TRACE_IRQ_MATRIX_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IRQ_MATRIX_H #include <linux/tracepoint.h> struct irq_matrix; struct cpumap; DECLARE_EVENT_CLASS(irq_matrix_global, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix), TP_STRUCT__entry( __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DECLARE_EVENT_CLASS(irq_matrix_global_update, TP_PROTO(int bit, struct irq_matrix *matrix), TP_ARGS(bit, matrix), TP_STRUCT__entry( __field( int, bit ) __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->bit = bit; __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("bit=%d online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->bit, __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DECLARE_EVENT_CLASS(irq_matrix_cpu, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap), TP_STRUCT__entry( __field( int, bit ) __field( unsigned int, cpu ) __field( bool, online ) __field( unsigned int, available ) __field( unsigned int, allocated ) __field( unsigned int, managed ) __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->bit = bit; __entry->cpu = cpu; __entry->online = cmap->online; __entry->available = cmap->available; __entry->allocated = cmap->allocated; __entry->managed = cmap->managed; __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("bit=%d cpu=%u online=%d avl=%u alloc=%u managed=%u online_maps=%u global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->bit, __entry->cpu, __entry->online, __entry->available, __entry->allocated, __entry->managed, __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_online, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_offline, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_reserve, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_remove_reserved, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global_update, irq_matrix_assign_system, TP_PROTO(int bit, struct irq_matrix *matrix), TP_ARGS(bit, matrix) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_reserve_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_remove_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_assign, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_free, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 226 276 276 242 226 276 242 179 181 242 29 37 1 1 246 246 7 156 159 159 159 155 146 48 21 21 21 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 | // SPDX-License-Identifier: GPL-2.0-only #ifndef KVM_X86_MMU_SPTE_H #define KVM_X86_MMU_SPTE_H #include <asm/vmx.h> #include "mmu.h" #include "mmu_internal.h" /* * A MMU present SPTE is backed by actual memory and may or may not be present * in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it * is ignored by all flavors of SPTEs and checking a low bit often generates * better code than for a high bit, e.g. 56+. MMU present checks are pervasive * enough that the improved code generation is noticeable in KVM's footprint. */ #define SPTE_MMU_PRESENT_MASK BIT_ULL(11) /* * TDP SPTES (more specifically, EPT SPTEs) may not have A/D bits, and may also * be restricted to using write-protection (for L2 when CPU dirty logging, i.e. * PML, is enabled). Use bits 52 and 53 to hold the type of A/D tracking that * is must be employed for a given TDP SPTE. * * Note, the "enabled" mask must be '0', as bits 62:52 are _reserved_ for PAE * paging, including NPT PAE. This scheme works because legacy shadow paging * is guaranteed to have A/D bits and write-protection is forced only for * TDP with CPU dirty logging (PML). If NPT ever gains PML-like support, it * must be restricted to 64-bit KVM. */ #define SPTE_TDP_AD_SHIFT 52 #define SPTE_TDP_AD_MASK (3ULL << SPTE_TDP_AD_SHIFT) #define SPTE_TDP_AD_ENABLED (0ULL << SPTE_TDP_AD_SHIFT) #define SPTE_TDP_AD_DISABLED (1ULL << SPTE_TDP_AD_SHIFT) #define SPTE_TDP_AD_WRPROT_ONLY (2ULL << SPTE_TDP_AD_SHIFT) static_assert(SPTE_TDP_AD_ENABLED == 0); #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK #define SPTE_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) #else #define SPTE_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #endif #define SPTE_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ | shadow_x_mask | shadow_nx_mask | shadow_me_mask) #define ACC_EXEC_MASK 1 #define ACC_WRITE_MASK PT_WRITABLE_MASK #define ACC_USER_MASK PT_USER_MASK #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) /* The mask for the R/X bits in EPT PTEs */ #define SPTE_EPT_READABLE_MASK 0x1ull #define SPTE_EPT_EXECUTABLE_MASK 0x4ull #define SPTE_LEVEL_BITS 9 #define SPTE_LEVEL_SHIFT(level) __PT_LEVEL_SHIFT(level, SPTE_LEVEL_BITS) #define SPTE_INDEX(address, level) __PT_INDEX(address, level, SPTE_LEVEL_BITS) #define SPTE_ENT_PER_PAGE __PT_ENT_PER_PAGE(SPTE_LEVEL_BITS) /* * The mask/shift to use for saving the original R/X bits when marking the PTE * as not-present for access tracking purposes. We do not save the W bit as the * PTEs being access tracked also need to be dirty tracked, so the W bit will be * restored only when a write is attempted to the page. This mask obviously * must not overlap the A/D type mask. */ #define SHADOW_ACC_TRACK_SAVED_BITS_MASK (SPTE_EPT_READABLE_MASK | \ SPTE_EPT_EXECUTABLE_MASK) #define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT 54 #define SHADOW_ACC_TRACK_SAVED_MASK (SHADOW_ACC_TRACK_SAVED_BITS_MASK << \ SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) static_assert(!(SPTE_TDP_AD_MASK & SHADOW_ACC_TRACK_SAVED_MASK)); /* * {DEFAULT,EPT}_SPTE_{HOST,MMU}_WRITABLE are used to keep track of why a given * SPTE is write-protected. See is_writable_pte() for details. */ /* Bits 9 and 10 are ignored by all non-EPT PTEs. */ #define DEFAULT_SPTE_HOST_WRITABLE BIT_ULL(9) #define DEFAULT_SPTE_MMU_WRITABLE BIT_ULL(10) /* * Low ignored bits are at a premium for EPT, use high ignored bits, taking care * to not overlap the A/D type mask or the saved access bits of access-tracked * SPTEs when A/D bits are disabled. */ #define EPT_SPTE_HOST_WRITABLE BIT_ULL(57) #define EPT_SPTE_MMU_WRITABLE BIT_ULL(58) static_assert(!(EPT_SPTE_HOST_WRITABLE & SPTE_TDP_AD_MASK)); static_assert(!(EPT_SPTE_MMU_WRITABLE & SPTE_TDP_AD_MASK)); static_assert(!(EPT_SPTE_HOST_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); /* Defined only to keep the above static asserts readable. */ #undef SHADOW_ACC_TRACK_SAVED_MASK /* * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of * the memslots generation and is derived as follows: * * Bits 0-7 of the MMIO generation are propagated to spte bits 3-10 * Bits 8-18 of the MMIO generation are propagated to spte bits 52-62 * * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in * the MMIO generation number, as doing so would require stealing a bit from * the "real" generation number and thus effectively halve the maximum number * of MMIO generations that can be handled before encountering a wrap (which * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 10 #define MMIO_SPTE_GEN_HIGH_START 52 #define MMIO_SPTE_GEN_HIGH_END 62 #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) static_assert(!(SPTE_MMU_PRESENT_MASK & (MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK))); /* * The SPTE MMIO mask must NOT overlap the MMIO generation bits or the * MMU-present bit. The generation obviously co-exists with the magic MMIO * mask/value, and MMIO SPTEs are considered !MMU-present. * * The SPTE MMIO mask is allowed to use hardware "present" bits (i.e. all EPT * RWX bits), all physical address bits (legal PA bits are used for "fast" MMIO * and so they're off-limits for generation; additional checks ensure the mask * doesn't overlap legal PA bits), and bit 63 (carved out for future usage). */ #define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | GENMASK_ULL(2, 0)) static_assert(!(SPTE_MMIO_ALLOWED_MASK & (SPTE_MMU_PRESENT_MASK | MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK))); #define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1) #define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1) /* remember to adjust the comment above as well if you change these */ static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11); #define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0) #define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS) #define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0) /* * Non-present SPTE value needs to set bit 63 for TDX, in order to suppress * #VE and get EPT violations on non-present PTEs. We can use the * same value also without TDX for both VMX and SVM: * * For SVM NPT, for non-present spte (bit 0 = 0), other bits are ignored. * For VMX EPT, bit 63 is ignored if #VE is disabled. (EPT_VIOLATION_VE=0) * bit 63 is #VE suppress if #VE is enabled. (EPT_VIOLATION_VE=1) */ #ifdef CONFIG_X86_64 #define SHADOW_NONPRESENT_VALUE BIT_ULL(63) static_assert(!(SHADOW_NONPRESENT_VALUE & SPTE_MMU_PRESENT_MASK)); #else #define SHADOW_NONPRESENT_VALUE 0ULL #endif /* * True if A/D bits are supported in hardware and are enabled by KVM. When * enabled, KVM uses A/D bits for all non-nested MMUs. Because L1 can disable * A/D bits in EPTP12, SP and SPTE variants are needed to handle the scenario * where KVM is using A/D bits for L1, but not L2. */ extern bool __read_mostly kvm_ad_enabled; extern u64 __read_mostly shadow_host_writable_mask; extern u64 __read_mostly shadow_mmu_writable_mask; extern u64 __read_mostly shadow_nx_mask; extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ extern u64 __read_mostly shadow_user_mask; extern u64 __read_mostly shadow_accessed_mask; extern u64 __read_mostly shadow_dirty_mask; extern u64 __read_mostly shadow_mmio_value; extern u64 __read_mostly shadow_mmio_mask; extern u64 __read_mostly shadow_mmio_access_mask; extern u64 __read_mostly shadow_present_mask; extern u64 __read_mostly shadow_me_value; extern u64 __read_mostly shadow_me_mask; /* * SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED; * shadow_acc_track_mask is the set of bits to be cleared in non-accessed * pages. */ extern u64 __read_mostly shadow_acc_track_mask; /* * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order * to guard against L1TF attacks. */ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; /* * The number of high-order 1 bits to use in the mask above. */ #define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5 /* * If a thread running without exclusive control of the MMU lock must perform a * multi-part operation on an SPTE, it can set the SPTE to FROZEN_SPTE as a * non-present intermediate value. Other threads which encounter this value * should not modify the SPTE. * * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on * both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF * vulnerability. * * Only used by the TDP MMU. */ #define FROZEN_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL) /* Frozen SPTEs must not be misconstrued as shadow present PTEs. */ static_assert(!(FROZEN_SPTE & SPTE_MMU_PRESENT_MASK)); static inline bool is_frozen_spte(u64 spte) { return spte == FROZEN_SPTE; } /* Get an SPTE's index into its parent's page table (and the spt array). */ static inline int spte_index(u64 *sptep) { return ((unsigned long)sptep / sizeof(*sptep)) & (SPTE_ENT_PER_PAGE - 1); } /* * In some cases, we need to preserve the GFN of a non-present or reserved * SPTE when we usurp the upper five bits of the physical address space to * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask * left into the reserved bits, i.e. the GFN in the SPTE will be split into * high and low parts. This mask covers the lower bits of the GFN. */ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; static inline hpa_t kvm_mmu_get_dummy_root(void) { return my_zero_pfn(0) << PAGE_SHIFT; } static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page) { return is_zero_pfn(shadow_page >> PAGE_SHIFT); } static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page) { struct page *page = pfn_to_page((shadow_page) >> PAGE_SHIFT); return (struct kvm_mmu_page *)page_private(page); } static inline struct kvm_mmu_page *spte_to_child_sp(u64 spte) { return to_shadow_page(spte & SPTE_BASE_ADDR_MASK); } static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) { return to_shadow_page(__pa(sptep)); } static inline struct kvm_mmu_page *root_to_sp(hpa_t root) { if (kvm_mmu_is_dummy_root(root)) return NULL; /* * The "root" may be a special root, e.g. a PAE entry, treat it as a * SPTE to ensure any non-PA bits are dropped. */ return spte_to_child_sp(root); } static inline bool is_mirror_sptep(tdp_ptep_t sptep) { return is_mirror_sp(sptep_to_sp(rcu_dereference(sptep))); } static inline bool kvm_vcpu_can_access_host_mmio(struct kvm_vcpu *vcpu) { struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); if (root) return READ_ONCE(root->has_mapped_host_mmio); return READ_ONCE(vcpu->kvm->arch.has_mapped_host_mmio); } static inline bool is_mmio_spte(struct kvm *kvm, u64 spte) { return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value && likely(enable_mmio_caching); } static inline bool is_shadow_present_pte(u64 pte) { return !!(pte & SPTE_MMU_PRESENT_MASK); } static inline bool is_ept_ve_possible(u64 spte) { return (shadow_present_mask & VMX_EPT_SUPPRESS_VE_BIT) && !(spte & VMX_EPT_SUPPRESS_VE_BIT) && (spte & VMX_EPT_RWX_MASK) != VMX_EPT_MISCONFIG_WX_VALUE; } static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) { return sp->role.ad_disabled; } static inline bool spte_ad_enabled(u64 spte) { KVM_MMU_WARN_ON(!is_shadow_present_pte(spte)); return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED; } static inline bool spte_ad_need_write_protect(u64 spte) { KVM_MMU_WARN_ON(!is_shadow_present_pte(spte)); /* * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0', * and non-TDP SPTEs will never set these bits. Optimize for 64-bit * TDP and do the A/D type check unconditionally. */ return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED; } static inline bool is_access_track_spte(u64 spte) { return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0; } static inline bool is_large_pte(u64 pte) { return pte & PT_PAGE_SIZE_MASK; } static inline bool is_last_spte(u64 pte, int level) { return (level == PG_LEVEL_4K) || is_large_pte(pte); } static inline bool is_executable_pte(u64 spte) { return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask; } static inline kvm_pfn_t spte_to_pfn(u64 pte) { return (pte & SPTE_BASE_ADDR_MASK) >> PAGE_SHIFT; } static inline bool is_accessed_spte(u64 spte) { return spte & shadow_accessed_mask; } static inline u64 get_rsvd_bits(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) { int bit7 = (pte >> 7) & 1; return rsvd_check->rsvd_bits_mask[bit7][level-1]; } static inline bool __is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) { return pte & get_rsvd_bits(rsvd_check, pte, level); } static inline bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check, u64 pte) { return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f); } static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, u64 spte, int level) { return __is_bad_mt_xwr(rsvd_check, spte) || __is_rsvd_bits_set(rsvd_check, spte, level); } /* * A shadow-present leaf SPTE may be non-writable for 4 possible reasons: * * 1. To intercept writes for dirty logging. KVM write-protects huge pages * so that they can be split down into the dirty logging * granularity (4KiB) whenever the guest writes to them. KVM also * write-protects 4KiB pages so that writes can be recorded in the dirty log * (e.g. if not using PML). SPTEs are write-protected for dirty logging * during the VM-iotcls that enable dirty logging. * * 2. To intercept writes to guest page tables that KVM is shadowing. When a * guest writes to its page table the corresponding shadow page table will * be marked "unsync". That way KVM knows which shadow page tables need to * be updated on the next TLB flush, INVLPG, etc. and which do not. * * 3. To prevent guest writes to read-only memory, such as for memory in a * read-only memslot or guest memory backed by a read-only VMA. Writes to * such pages are disallowed entirely. * * 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this * case, the SPTE is access-protected, not just write-protected! * * For cases #1 and #4, KVM can safely make such SPTEs writable without taking * mmu_lock as capturing the Accessed/Dirty state doesn't require taking it. * To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits * in the SPTE: * * shadow_mmu_writable_mask, aka MMU-writable - * Cleared on SPTEs that KVM is currently write-protecting for shadow paging * purposes (case 2 above). * * shadow_host_writable_mask, aka Host-writable - * Cleared on SPTEs that are not host-writable (case 3 above) * * Note, not all possible combinations of PT_WRITABLE_MASK, * shadow_mmu_writable_mask, and shadow_host_writable_mask are valid. A given * SPTE can be in only one of the following states, which map to the * aforementioned 3 cases: * * shadow_host_writable_mask | shadow_mmu_writable_mask | PT_WRITABLE_MASK * ------------------------- | ------------------------ | ---------------- * 1 | 1 | 1 (writable) * 1 | 1 | 0 (case 1) * 1 | 0 | 0 (case 2) * 0 | 0 | 0 (case 3) * * The valid combinations of these bits are checked by * check_spte_writable_invariants() whenever an SPTE is modified. * * Clearing the MMU-writable bit is always done under the MMU lock and always * accompanied by a TLB flush before dropping the lock to avoid corrupting the * shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging * (which does not clear the MMU-writable bit), does not flush TLBs before * dropping the lock, as it only needs to synchronize guest writes with the * dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for * access-tracking via the clear_young() MMU notifier also does not flush TLBs. * * So, there is the problem: clearing the MMU-writable bit can encounter a * write-protected SPTE while CPUs still have writable mappings for that SPTE * cached in their TLB. To address this, KVM always flushes TLBs when * write-protecting SPTEs if the MMU-writable bit is set on the old SPTE. * * The Host-writable bit is not modified on present SPTEs, it is only set or * cleared when an SPTE is first faulted in from non-present and then remains * immutable. */ static inline bool is_writable_pte(unsigned long pte) { return pte & PT_WRITABLE_MASK; } /* Note: spte must be a shadow-present leaf SPTE. */ static inline void check_spte_writable_invariants(u64 spte) { if (spte & shadow_mmu_writable_mask) WARN_ONCE(!(spte & shadow_host_writable_mask), KBUILD_MODNAME ": MMU-writable SPTE is not Host-writable: %llx", spte); else WARN_ONCE(is_writable_pte(spte), KBUILD_MODNAME ": Writable SPTE is not MMU-writable: %llx", spte); } static inline bool is_mmu_writable_spte(u64 spte) { return spte & shadow_mmu_writable_mask; } /* * Returns true if the access indicated by @fault is allowed by the existing * SPTE protections. Note, the caller is responsible for checking that the * SPTE is a shadow-present, leaf SPTE (either before or after). */ static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) { if (fault->exec) return is_executable_pte(spte); if (fault->write) return is_writable_pte(spte); /* Fault was on Read access */ return spte & PT_PRESENT_MASK; } /* * If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for * write-tracking, remote TLBs must be flushed, even if the SPTE was read-only, * as KVM allows stale Writable TLB entries to exist. When dirty logging, KVM * flushes TLBs based on whether or not dirty bitmap/ring entries were reaped, * not whether or not SPTEs were modified, i.e. only the write-tracking case * needs to flush at the time the SPTEs is modified, before dropping mmu_lock. * * Don't flush if the Accessed bit is cleared, as access tracking tolerates * false negatives, e.g. KVM x86 omits TLB flushes even when aging SPTEs for a * mmu_notifier.clear_flush_young() event. * * Lastly, don't flush if the Dirty bit is cleared, as KVM unconditionally * flushes when enabling dirty logging (see kvm_mmu_slot_apply_flags()), and * when clearing dirty logs, KVM flushes based on whether or not dirty entries * were reaped from the bitmap/ring, not whether or not dirty SPTEs were found. * * Note, this logic only applies to shadow-present leaf SPTEs. The caller is * responsible for checking that the old SPTE is shadow-present, and is also * responsible for determining whether or not a TLB flush is required when * modifying a shadow-present non-leaf SPTE. */ static inline bool leaf_spte_change_needs_tlb_flush(u64 old_spte, u64 new_spte) { return is_mmu_writable_spte(old_spte) && !is_mmu_writable_spte(new_spte); } static inline u64 get_mmio_spte_generation(u64 spte) { u64 gen; gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT; gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT; return gen; } bool spte_needs_atomic_update(u64 spte); bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool prefetch, bool synchronizing, bool host_writable, u64 *new_spte); u64 make_small_spte(struct kvm *kvm, u64 huge_spte, union kvm_mmu_page_role role, int index); u64 make_huge_spte(struct kvm *kvm, u64 small_spte, int level); u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled); u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access); u64 mark_spte_for_access_track(u64 spte); /* Restore an acc-track PTE back to a regular PTE */ static inline u64 restore_acc_track_spte(u64 spte) { u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) & SHADOW_ACC_TRACK_SAVED_BITS_MASK; spte &= ~shadow_acc_track_mask; spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK << SHADOW_ACC_TRACK_SAVED_BITS_SHIFT); spte |= saved_bits; return spte; } void __init kvm_mmu_spte_module_init(void); void kvm_mmu_reset_all_pte_masks(void); #endif |
| 4 4 4 4 4 4 4 4 4 6 6 6 6 6 6 6 4 4 4 4 4 4 4 4 4 4 4 4 6 6 6 6 8 8 8 8 8 8 8 8 8 8 8 8 8 8 6 6 4 4 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * Copyright (c) Nokia Corporation, 2006, 2007 * * Author: Artem Bityutskiy (Битюцкий Артём) */ /* * This file includes volume table manipulation code. The volume table is an * on-flash table containing volume meta-data like name, number of reserved * physical eraseblocks, type, etc. The volume table is stored in the so-called * "layout volume". * * The layout volume is an internal volume which is organized as follows. It * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each * other. This redundancy guarantees robustness to unclean reboots. The volume * table is basically an array of volume table records. Each record contains * full information about the volume and protected by a CRC checksum. Note, * nowadays we use the atomic LEB change operation when updating the volume * table, so we do not really need 2 LEBs anymore, but we preserve the older * design for the backward compatibility reasons. * * When the volume table is changed, it is first changed in RAM. Then LEB 0 is * erased, and the updated volume table is written back to LEB 0. Then same for * LEB 1. This scheme guarantees recoverability from unclean reboots. * * In this UBI implementation the on-flash volume table does not contain any * information about how much data static volumes contain. * * But it would still be beneficial to store this information in the volume * table. For example, suppose we have a static volume X, and all its physical * eraseblocks became bad for some reasons. Suppose we are attaching the * corresponding MTD device, for some reason we find no logical eraseblocks * corresponding to the volume X. According to the volume table volume X does * exist. So we don't know whether it is just empty or all its physical * eraseblocks went bad. So we cannot alarm the user properly. * * The volume table also stores so-called "update marker", which is used for * volume updates. Before updating the volume, the update marker is set, and * after the update operation is finished, the update marker is cleared. So if * the update operation was interrupted (e.g. by an unclean reboot) - the * update marker is still there and we know that the volume's contents is * damaged. */ #include <linux/crc32.h> #include <linux/err.h> #include <linux/slab.h> #include <asm/div64.h> #include "ubi.h" static void self_vtbl_check(const struct ubi_device *ubi); /* Empty volume table record */ static struct ubi_vtbl_record empty_vtbl_record; /** * ubi_update_layout_vol - helper for updatting layout volumes on flash * @ubi: UBI device description object */ static int ubi_update_layout_vol(struct ubi_device *ubi) { struct ubi_volume *layout_vol; int i, err; layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { err = ubi_eba_atomic_leb_change(ubi, layout_vol, i, ubi->vtbl, ubi->vtbl_size); if (err) return err; } return 0; } /** * ubi_change_vtbl_record - change volume table record. * @ubi: UBI device description object * @idx: table index to change * @vtbl_rec: new volume table record * * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty * volume table record is written. The caller does not have to calculate CRC of * the record as it is done by this function. Returns zero in case of success * and a negative error code in case of failure. */ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, struct ubi_vtbl_record *vtbl_rec) { int err; uint32_t crc; ubi_assert(idx >= 0 && idx < ubi->vtbl_slots); if (!vtbl_rec) vtbl_rec = &empty_vtbl_record; else { crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC); vtbl_rec->crc = cpu_to_be32(crc); } memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record)); err = ubi_update_layout_vol(ubi); self_vtbl_check(ubi); return err ? err : 0; } /** * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table. * @ubi: UBI device description object * @rename_list: list of &struct ubi_rename_entry objects * * This function re-names multiple volumes specified in @req in the volume * table. Returns zero in case of success and a negative error code in case of * failure. */ int ubi_vtbl_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list) { struct ubi_rename_entry *re; list_for_each_entry(re, rename_list, list) { uint32_t crc; struct ubi_volume *vol = re->desc->vol; struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id]; if (re->remove) { memcpy(vtbl_rec, &empty_vtbl_record, sizeof(struct ubi_vtbl_record)); continue; } vtbl_rec->name_len = cpu_to_be16(re->new_name_len); memcpy(vtbl_rec->name, re->new_name, re->new_name_len); memset(vtbl_rec->name + re->new_name_len, 0, UBI_VOL_NAME_MAX + 1 - re->new_name_len); crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC); vtbl_rec->crc = cpu_to_be32(crc); } return ubi_update_layout_vol(ubi); } /** * vtbl_check - check if volume table is not corrupted and sensible. * @ubi: UBI device description object * @vtbl: volume table * * This function returns zero if @vtbl is all right, %1 if CRC is incorrect, * and %-EINVAL if it contains inconsistent data. */ static int vtbl_check(const struct ubi_device *ubi, const struct ubi_vtbl_record *vtbl) { int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; int upd_marker, err; uint32_t crc; const char *name; for (i = 0; i < ubi->vtbl_slots; i++) { cond_resched(); reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); alignment = be32_to_cpu(vtbl[i].alignment); data_pad = be32_to_cpu(vtbl[i].data_pad); upd_marker = vtbl[i].upd_marker; vol_type = vtbl[i].vol_type; name_len = be16_to_cpu(vtbl[i].name_len); name = &vtbl[i].name[0]; crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC); if (be32_to_cpu(vtbl[i].crc) != crc) { ubi_err(ubi, "bad CRC at record %u: %#08x, not %#08x", i, crc, be32_to_cpu(vtbl[i].crc)); ubi_dump_vtbl_record(&vtbl[i], i); return 1; } if (reserved_pebs == 0) { if (memcmp(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE)) { err = 2; goto bad; } continue; } if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || name_len < 0) { err = 3; goto bad; } if (alignment > ubi->leb_size || alignment == 0) { err = 4; goto bad; } n = alignment & (ubi->min_io_size - 1); if (alignment != 1 && n) { err = 5; goto bad; } n = ubi->leb_size % alignment; if (data_pad != n) { ubi_err(ubi, "bad data_pad, has to be %d", n); err = 6; goto bad; } if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { err = 7; goto bad; } if (upd_marker != 0 && upd_marker != 1) { err = 8; goto bad; } if (reserved_pebs > ubi->good_peb_count) { ubi_err(ubi, "too large reserved_pebs %d, good PEBs %d", reserved_pebs, ubi->good_peb_count); err = 9; goto bad; } if (name_len > UBI_VOL_NAME_MAX) { err = 10; goto bad; } if (name[0] == '\0') { err = 11; goto bad; } if (name_len != strnlen(name, name_len + 1)) { err = 12; goto bad; } } /* Checks that all names are unique */ for (i = 0; i < ubi->vtbl_slots - 1; i++) { for (n = i + 1; n < ubi->vtbl_slots; n++) { int len1 = be16_to_cpu(vtbl[i].name_len); int len2 = be16_to_cpu(vtbl[n].name_len); if (len1 > 0 && len1 == len2 && !strncmp(vtbl[i].name, vtbl[n].name, len1)) { ubi_err(ubi, "volumes %d and %d have the same name \"%s\"", i, n, vtbl[i].name); ubi_dump_vtbl_record(&vtbl[i], i); ubi_dump_vtbl_record(&vtbl[n], n); return -EINVAL; } } } return 0; bad: ubi_err(ubi, "volume table check failed: record %d, error %d", i, err); ubi_dump_vtbl_record(&vtbl[i], i); return -EINVAL; } /** * create_vtbl - create a copy of volume table. * @ubi: UBI device description object * @ai: attaching information * @copy: number of the volume table copy * @vtbl: contents of the volume table * * This function returns zero in case of success and a negative error code in * case of failure. */ static int create_vtbl(struct ubi_device *ubi, struct ubi_attach_info *ai, int copy, void *vtbl) { int err, tries = 0; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; struct ubi_ainf_peb *new_aeb; dbg_gen("create volume table (copy #%d)", copy + 1); vidb = ubi_alloc_vid_buf(ubi, GFP_KERNEL); if (!vidb) return -ENOMEM; vid_hdr = ubi_get_vid_hdr(vidb); retry: new_aeb = ubi_early_get_peb(ubi, ai); if (IS_ERR(new_aeb)) { err = PTR_ERR(new_aeb); goto out_free; } vid_hdr->vol_type = UBI_LAYOUT_VOLUME_TYPE; vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; vid_hdr->data_size = vid_hdr->used_ebs = vid_hdr->data_pad = cpu_to_be32(0); vid_hdr->lnum = cpu_to_be32(copy); vid_hdr->sqnum = cpu_to_be64(++ai->max_sqnum); /* The EC header is already there, write the VID header */ err = ubi_io_write_vid_hdr(ubi, new_aeb->pnum, vidb); if (err) goto write_error; /* Write the layout volume contents */ err = ubi_io_write_data(ubi, vtbl, new_aeb->pnum, 0, ubi->vtbl_size); if (err) goto write_error; /* * And add it to the attaching information. Don't delete the old version * of this LEB as it will be deleted and freed in 'ubi_add_to_av()'. */ err = ubi_add_to_av(ubi, ai, new_aeb->pnum, new_aeb->ec, vid_hdr, 0); ubi_free_aeb(ai, new_aeb); ubi_free_vid_buf(vidb); return err; write_error: if (err == -EIO && ++tries <= 5) { /* * Probably this physical eraseblock went bad, try to pick * another one. */ list_add(&new_aeb->u.list, &ai->erase); goto retry; } ubi_free_aeb(ai, new_aeb); out_free: ubi_free_vid_buf(vidb); return err; } /** * process_lvol - process the layout volume. * @ubi: UBI device description object * @ai: attaching information * @av: layout volume attaching information * * This function is responsible for reading the layout volume, ensuring it is * not corrupted, and recovering from corruptions if needed. Returns volume * table in case of success and a negative error code in case of failure. */ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, struct ubi_attach_info *ai, struct ubi_ainf_volume *av) { int err; struct rb_node *rb; struct ubi_ainf_peb *aeb; struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL }; int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1}; /* * UBI goes through the following steps when it changes the layout * volume: * a. erase LEB 0; * b. write new data to LEB 0; * c. erase LEB 1; * d. write new data to LEB 1. * * Before the change, both LEBs contain the same data. * * Due to unclean reboots, the contents of LEB 0 may be lost, but there * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not. * Similarly, LEB 1 may be lost, but there should be LEB 0. And * finally, unclean reboots may result in a situation when neither LEB * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB * 0 contains more recent information. * * So the plan is to first check LEB 0. Then * a. if LEB 0 is OK, it must be containing the most recent data; then * we compare it with LEB 1, and if they are different, we copy LEB * 0 to LEB 1; * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1 * to LEB 0. */ dbg_gen("check layout volume"); /* Read both LEB 0 and LEB 1 into memory */ ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { leb[aeb->lnum] = vzalloc(ubi->vtbl_size); if (!leb[aeb->lnum]) { err = -ENOMEM; goto out_free; } err = ubi_io_read_data(ubi, leb[aeb->lnum], aeb->pnum, 0, ubi->vtbl_size); if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) /* * Scrub the PEB later. Note, -EBADMSG indicates an * uncorrectable ECC error, but we have our own CRC and * the data will be checked later. If the data is OK, * the PEB will be scrubbed (because we set * aeb->scrub). If the data is not OK, the contents of * the PEB will be recovered from the second copy, and * aeb->scrub will be cleared in * 'ubi_add_to_av()'. */ aeb->scrub = 1; else if (err) goto out_free; } err = -EINVAL; if (leb[0]) { leb_corrupted[0] = vtbl_check(ubi, leb[0]); if (leb_corrupted[0] < 0) goto out_free; } if (!leb_corrupted[0]) { /* LEB 0 is OK */ if (leb[1]) leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); if (leb_corrupted[1]) { ubi_warn(ubi, "volume table copy #2 is corrupted"); err = create_vtbl(ubi, ai, 1, leb[0]); if (err) goto out_free; ubi_msg(ubi, "volume table was restored"); } /* Both LEB 1 and LEB 2 are OK and consistent */ vfree(leb[1]); return leb[0]; } else { /* LEB 0 is corrupted or does not exist */ if (leb[1]) { leb_corrupted[1] = vtbl_check(ubi, leb[1]); if (leb_corrupted[1] < 0) goto out_free; } if (leb_corrupted[1]) { /* Both LEB 0 and LEB 1 are corrupted */ ubi_err(ubi, "both volume tables are corrupted"); goto out_free; } ubi_warn(ubi, "volume table copy #1 is corrupted"); err = create_vtbl(ubi, ai, 0, leb[1]); if (err) goto out_free; ubi_msg(ubi, "volume table was restored"); vfree(leb[0]); return leb[1]; } out_free: vfree(leb[0]); vfree(leb[1]); return ERR_PTR(err); } /** * create_empty_lvol - create empty layout volume. * @ubi: UBI device description object * @ai: attaching information * * This function returns volume table contents in case of success and a * negative error code in case of failure. */ static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, struct ubi_attach_info *ai) { int i; struct ubi_vtbl_record *vtbl; vtbl = vzalloc(ubi->vtbl_size); if (!vtbl) return ERR_PTR(-ENOMEM); for (i = 0; i < ubi->vtbl_slots; i++) memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE); for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { int err; err = create_vtbl(ubi, ai, i, vtbl); if (err) { vfree(vtbl); return ERR_PTR(err); } } return vtbl; } /** * init_volumes - initialize volume information for existing volumes. * @ubi: UBI device description object * @ai: scanning information * @vtbl: volume table * * This function allocates volume description objects for existing volumes. * Returns zero in case of success and a negative error code in case of * failure. */ static int init_volumes(struct ubi_device *ubi, const struct ubi_attach_info *ai, const struct ubi_vtbl_record *vtbl) { int i, err, reserved_pebs = 0; struct ubi_ainf_volume *av; struct ubi_volume *vol; for (i = 0; i < ubi->vtbl_slots; i++) { cond_resched(); if (be32_to_cpu(vtbl[i].reserved_pebs) == 0) continue; /* Empty record */ vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); if (!vol) return -ENOMEM; vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); vol->alignment = be32_to_cpu(vtbl[i].alignment); vol->data_pad = be32_to_cpu(vtbl[i].data_pad); vol->upd_marker = vtbl[i].upd_marker; vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; vol->name_len = be16_to_cpu(vtbl[i].name_len); vol->usable_leb_size = ubi->leb_size - vol->data_pad; memcpy(vol->name, vtbl[i].name, vol->name_len); vol->name[vol->name_len] = '\0'; vol->vol_id = i; if (vtbl[i].flags & UBI_VTBL_SKIP_CRC_CHECK_FLG) vol->skip_check = 1; if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { /* Auto re-size flag may be set only for one volume */ if (ubi->autoresize_vol_id != -1) { ubi_err(ubi, "more than one auto-resize volume (%d and %d)", ubi->autoresize_vol_id, i); kfree(vol); return -EINVAL; } ubi->autoresize_vol_id = i; } ubi_assert(!ubi->volumes[i]); ubi->volumes[i] = vol; ubi->vol_count += 1; vol->ubi = ubi; reserved_pebs += vol->reserved_pebs; /* * We use ubi->peb_count and not vol->reserved_pebs because * we want to keep the code simple. Otherwise we'd have to * resize/check the bitmap upon volume resize too. * Allocating a few bytes more does not hurt. */ err = ubi_fastmap_init_checkmap(vol, ubi->peb_count); if (err) return err; /* * In case of dynamic volume UBI knows nothing about how many * data is stored there. So assume the whole volume is used. */ if (vol->vol_type == UBI_DYNAMIC_VOLUME) { vol->used_ebs = vol->reserved_pebs; vol->last_eb_bytes = vol->usable_leb_size; vol->used_bytes = (long long)vol->used_ebs * vol->usable_leb_size; continue; } /* Static volumes only */ av = ubi_find_av(ai, i); if (!av || !av->leb_count) { /* * No eraseblocks belonging to this volume found. We * don't actually know whether this static volume is * completely corrupted or just contains no data. And * we cannot know this as long as data size is not * stored on flash. So we just assume the volume is * empty. FIXME: this should be handled. */ continue; } if (av->leb_count != av->used_ebs) { /* * We found a static volume which misses several * eraseblocks. Treat it as corrupted. */ ubi_warn(ubi, "static volume %d misses %d LEBs - corrupted", av->vol_id, av->used_ebs - av->leb_count); vol->corrupted = 1; continue; } vol->used_ebs = av->used_ebs; vol->used_bytes = (long long)(vol->used_ebs - 1) * vol->usable_leb_size; vol->used_bytes += av->last_data_size; vol->last_eb_bytes = av->last_data_size; } /* And add the layout volume */ vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); if (!vol) return -ENOMEM; vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS; vol->alignment = UBI_LAYOUT_VOLUME_ALIGN; vol->vol_type = UBI_DYNAMIC_VOLUME; vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1; memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1); vol->usable_leb_size = ubi->leb_size; vol->used_ebs = vol->reserved_pebs; vol->last_eb_bytes = vol->reserved_pebs; vol->used_bytes = (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad); vol->vol_id = UBI_LAYOUT_VOLUME_ID; vol->ref_count = 1; ubi_assert(!ubi->volumes[i]); ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol; reserved_pebs += vol->reserved_pebs; ubi->vol_count += 1; vol->ubi = ubi; err = ubi_fastmap_init_checkmap(vol, UBI_LAYOUT_VOLUME_EBS); if (err) return err; if (reserved_pebs > ubi->avail_pebs) { ubi_err(ubi, "not enough PEBs, required %d, available %d", reserved_pebs, ubi->avail_pebs); if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); return -ENOSPC; } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; return 0; } /** * check_av - check volume attaching information. * @vol: UBI volume description object * @av: volume attaching information * * This function returns zero if the volume attaching information is consistent * to the data read from the volume tabla, and %-EINVAL if not. */ static int check_av(const struct ubi_volume *vol, const struct ubi_ainf_volume *av) { int err; if (av->highest_lnum >= vol->reserved_pebs) { err = 1; goto bad; } if (av->leb_count > vol->reserved_pebs) { err = 2; goto bad; } if (av->vol_type != vol->vol_type) { err = 3; goto bad; } if (av->used_ebs > vol->reserved_pebs) { err = 4; goto bad; } if (av->data_pad != vol->data_pad) { err = 5; goto bad; } return 0; bad: ubi_err(vol->ubi, "bad attaching information, error %d", err); ubi_dump_av(av); ubi_dump_vol_info(vol); return -EINVAL; } /** * check_attaching_info - check that attaching information. * @ubi: UBI device description object * @ai: attaching information * * Even though we protect on-flash data by CRC checksums, we still don't trust * the media. This function ensures that attaching information is consistent to * the information read from the volume table. Returns zero if the attaching * information is OK and %-EINVAL if it is not. */ static int check_attaching_info(const struct ubi_device *ubi, struct ubi_attach_info *ai) { int err, i; struct ubi_ainf_volume *av; struct ubi_volume *vol; if (ai->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) { ubi_err(ubi, "found %d volumes while attaching, maximum is %d + %d", ai->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots); return -EINVAL; } if (ai->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && ai->highest_vol_id < UBI_INTERNAL_VOL_START) { ubi_err(ubi, "too large volume ID %d found", ai->highest_vol_id); return -EINVAL; } for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { cond_resched(); av = ubi_find_av(ai, i); vol = ubi->volumes[i]; if (!vol) { if (av) ubi_remove_av(ai, av); continue; } if (vol->reserved_pebs == 0) { ubi_assert(i < ubi->vtbl_slots); if (!av) continue; /* * During attaching we found a volume which does not * exist according to the information in the volume * table. This must have happened due to an unclean * reboot while the volume was being removed. Discard * these eraseblocks. */ ubi_msg(ubi, "finish volume %d removal", av->vol_id); ubi_remove_av(ai, av); } else if (av) { err = check_av(vol, av); if (err) return err; } } return 0; } /** * ubi_read_volume_table - read the volume table. * @ubi: UBI device description object * @ai: attaching information * * This function reads volume table, checks it, recover from errors if needed, * or creates it if needed. Returns zero in case of success and a negative * error code in case of failure. */ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) { int err; struct ubi_ainf_volume *av; empty_vtbl_record.crc = cpu_to_be32(0xf116c36b); /* * The number of supported volumes is limited by the eraseblock size * and by the UBI_MAX_VOLUMES constant. */ if (ubi->leb_size < UBI_VTBL_RECORD_SIZE) { ubi_err(ubi, "LEB size too small for a volume record"); return -EINVAL; } ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; if (ubi->vtbl_slots > UBI_MAX_VOLUMES) ubi->vtbl_slots = UBI_MAX_VOLUMES; ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); av = ubi_find_av(ai, UBI_LAYOUT_VOLUME_ID); if (!av) { /* * No logical eraseblocks belonging to the layout volume were * found. This could mean that the flash is just empty. In * this case we create empty layout volume. * * But if flash is not empty this must be a corruption or the * MTD device just contains garbage. */ if (ai->is_empty) { ubi->vtbl = create_empty_lvol(ubi, ai); if (IS_ERR(ubi->vtbl)) return PTR_ERR(ubi->vtbl); } else { ubi_err(ubi, "the layout volume was not found"); return -EINVAL; } } else { if (av->leb_count > UBI_LAYOUT_VOLUME_EBS) { /* This must not happen with proper UBI images */ ubi_err(ubi, "too many LEBs (%d) in layout volume", av->leb_count); return -EINVAL; } ubi->vtbl = process_lvol(ubi, ai, av); if (IS_ERR(ubi->vtbl)) return PTR_ERR(ubi->vtbl); } ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count; /* * The layout volume is OK, initialize the corresponding in-RAM data * structures. */ err = init_volumes(ubi, ai, ubi->vtbl); if (err) goto out_free; /* * Make sure that the attaching information is consistent to the * information stored in the volume table. */ err = check_attaching_info(ubi, ai); if (err) goto out_free; return 0; out_free: vfree(ubi->vtbl); ubi_free_all_volumes(ubi); return err; } /** * self_vtbl_check - check volume table. * @ubi: UBI device description object */ static void self_vtbl_check(const struct ubi_device *ubi) { if (!ubi_dbg_chk_gen(ubi)) return; if (vtbl_check(ubi, ubi->vtbl)) { ubi_err(ubi, "self-check failed"); BUG(); } } |
| 9 9 3 3 9 9 9 2 9 2 10 17 10 1 1 1 1 1 5 5 5 5 2 5 5 5 2 6 5 2 6 5 6 2 2 6 6 6 3 2 3 10 10 9 10 20 20 19 17 4 4 10 8 9 8 13 3 13 11 5 10 10 10 10 3 9 1 9 8 8 20 8 6 6 6 15 15 15 15 15 15 15 9 9 3 3 3 9 9 2 9 9 9 26 26 26 24 26 25 4 4 4 4 4 6 5 3 1 3 10 9 9 12 12 11 11 9 1 9 7 1 6 9 8 7 6 9 20 19 3 4 16 5 5 20 11 5 5 11 24 22 22 3 2 2 1 19 18 19 8 2 11 3 1 17 1 17 17 16 4 17 17 24 6 1 5 1 4 3 4 4 3 4 4 6 8 7 6 6 8 4 3 2 1 1 2 4 7 1 2 1 1 2 2 1 7 8 7 7 7 3 7 8 4 3 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015, Sony Mobile Communications Inc. * Copyright (c) 2013, The Linux Foundation. All rights reserved. */ #include <linux/module.h> #include <linux/netlink.h> #include <linux/qrtr.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/spinlock.h> #include <linux/wait.h> #include <net/sock.h> #include "qrtr.h" #define QRTR_PROTO_VER_1 1 #define QRTR_PROTO_VER_2 3 /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff #define QRTR_EPH_PORT_RANGE \ XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET) #define QRTR_PORT_CTRL_LEGACY 0xffff /** * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 * @version: protocol version * @type: packet type; one of QRTR_TYPE_* * @src_node_id: source node * @src_port_id: source port * @confirm_rx: boolean; whether a resume-tx packet should be send in reply * @size: length of packet, excluding this header * @dst_node_id: destination node * @dst_port_id: destination port */ struct qrtr_hdr_v1 { __le32 version; __le32 type; __le32 src_node_id; __le32 src_port_id; __le32 confirm_rx; __le32 size; __le32 dst_node_id; __le32 dst_port_id; } __packed; /** * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions * @version: protocol version * @type: packet type; one of QRTR_TYPE_* * @flags: bitmask of QRTR_FLAGS_* * @optlen: length of optional header data * @size: length of packet, excluding this header and optlen * @src_node_id: source node * @src_port_id: source port * @dst_node_id: destination node * @dst_port_id: destination port */ struct qrtr_hdr_v2 { u8 version; u8 type; u8 flags; u8 optlen; __le32 size; __le16 src_node_id; __le16 src_port_id; __le16 dst_node_id; __le16 dst_port_id; }; #define QRTR_FLAGS_CONFIRM_RX BIT(0) struct qrtr_cb { u32 src_node; u32 src_port; u32 dst_node; u32 dst_port; u8 type; u8 confirm_rx; }; #define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \ sizeof(struct qrtr_hdr_v2)) struct qrtr_sock { /* WARNING: sk must be the first member */ struct sock sk; struct sockaddr_qrtr us; struct sockaddr_qrtr peer; }; static inline struct qrtr_sock *qrtr_sk(struct sock *sk) { BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0); return container_of(sk, struct qrtr_sock, sk); } static unsigned int qrtr_local_nid = 1; /* for node ids */ static RADIX_TREE(qrtr_nodes, GFP_ATOMIC); static DEFINE_SPINLOCK(qrtr_nodes_lock); /* broadcast list */ static LIST_HEAD(qrtr_all_nodes); /* lock for qrtr_all_nodes and node reference */ static DEFINE_MUTEX(qrtr_node_lock); /* local port allocation management */ static DEFINE_XARRAY_ALLOC(qrtr_ports); /** * struct qrtr_node - endpoint node * @ep_lock: lock for endpoint management and callbacks * @ep: endpoint * @ref: reference count for node * @nid: node id * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port * @qrtr_tx_lock: lock for qrtr_tx_flow inserts * @rx_queue: receive queue * @item: list item for broadcast list */ struct qrtr_node { struct mutex ep_lock; struct qrtr_endpoint *ep; struct kref ref; unsigned int nid; struct radix_tree_root qrtr_tx_flow; struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */ struct sk_buff_head rx_queue; struct list_head item; }; /** * struct qrtr_tx_flow - tx flow control * @resume_tx: waiters for a resume tx from the remote * @pending: number of waiting senders * @tx_failed: indicates that a message with confirm_rx flag was lost */ struct qrtr_tx_flow { struct wait_queue_head resume_tx; int pending; int tx_failed; }; #define QRTR_TX_FLOW_HIGH 10 #define QRTR_TX_FLOW_LOW 5 static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to); static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to); static struct qrtr_sock *qrtr_port_lookup(int port); static void qrtr_port_put(struct qrtr_sock *ipc); /* Release node resources and free the node. * * Do not call directly, use qrtr_node_release. To be used with * kref_put_mutex. As such, the node mutex is expected to be locked on call. */ static void __qrtr_node_release(struct kref *kref) { struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); struct radix_tree_iter iter; struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; spin_lock_irqsave(&qrtr_nodes_lock, flags); /* If the node is a bridge for other nodes, there are possibly * multiple entries pointing to our released node, delete them all. */ radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) { if (*slot == node) radix_tree_iter_delete(&qrtr_nodes, &iter, slot); } spin_unlock_irqrestore(&qrtr_nodes_lock, flags); list_del(&node->item); mutex_unlock(&qrtr_node_lock); skb_queue_purge(&node->rx_queue); /* Free tx flow counters */ radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { flow = *slot; radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); kfree(flow); } kfree(node); } /* Increment reference to node. */ static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node) { if (node) kref_get(&node->ref); return node; } /* Decrement reference to node and release as necessary. */ static void qrtr_node_release(struct qrtr_node *node) { if (!node) return; kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock); } /** * qrtr_tx_resume() - reset flow control counter * @node: qrtr_node that the QRTR_TYPE_RESUME_TX packet arrived on * @skb: resume_tx packet */ static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb) { struct qrtr_ctrl_pkt *pkt = (struct qrtr_ctrl_pkt *)skb->data; u64 remote_node = le32_to_cpu(pkt->client.node); u32 remote_port = le32_to_cpu(pkt->client.port); struct qrtr_tx_flow *flow; unsigned long key; key = remote_node << 32 | remote_port; rcu_read_lock(); flow = radix_tree_lookup(&node->qrtr_tx_flow, key); rcu_read_unlock(); if (flow) { spin_lock(&flow->resume_tx.lock); flow->pending = 0; spin_unlock(&flow->resume_tx.lock); wake_up_interruptible_all(&flow->resume_tx); } consume_skb(skb); } /** * qrtr_tx_wait() - flow control for outgoing packets * @node: qrtr_node that the packet is to be send to * @dest_node: node id of the destination * @dest_port: port number of the destination * @type: type of message * * The flow control scheme is based around the low and high "watermarks". When * the low watermark is passed the confirm_rx flag is set on the outgoing * message, which will trigger the remote to send a control message of the type * QRTR_TYPE_RESUME_TX to reset the counter. If the high watermark is hit * further transmision should be paused. * * Return: 1 if confirm_rx should be set, 0 otherwise or errno failure */ static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port, int type) { unsigned long key = (u64)dest_node << 32 | dest_port; struct qrtr_tx_flow *flow; int confirm_rx = 0; int ret; /* Never set confirm_rx on non-data packets */ if (type != QRTR_TYPE_DATA) return 0; mutex_lock(&node->qrtr_tx_lock); flow = radix_tree_lookup(&node->qrtr_tx_flow, key); if (!flow) { flow = kzalloc(sizeof(*flow), GFP_KERNEL); if (flow) { init_waitqueue_head(&flow->resume_tx); if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) { kfree(flow); flow = NULL; } } } mutex_unlock(&node->qrtr_tx_lock); /* Set confirm_rx if we where unable to find and allocate a flow */ if (!flow) return 1; spin_lock_irq(&flow->resume_tx.lock); ret = wait_event_interruptible_locked_irq(flow->resume_tx, flow->pending < QRTR_TX_FLOW_HIGH || flow->tx_failed || !node->ep); if (ret < 0) { confirm_rx = ret; } else if (!node->ep) { confirm_rx = -EPIPE; } else if (flow->tx_failed) { flow->tx_failed = 0; confirm_rx = 1; } else { flow->pending++; confirm_rx = flow->pending == QRTR_TX_FLOW_LOW; } spin_unlock_irq(&flow->resume_tx.lock); return confirm_rx; } /** * qrtr_tx_flow_failed() - flag that tx of confirm_rx flagged messages failed * @node: qrtr_node that the packet is to be send to * @dest_node: node id of the destination * @dest_port: port number of the destination * * Signal that the transmission of a message with confirm_rx flag failed. The * flow's "pending" counter will keep incrementing towards QRTR_TX_FLOW_HIGH, * at which point transmission would stall forever waiting for the resume TX * message associated with the dropped confirm_rx message. * Work around this by marking the flow as having a failed transmission and * cause the next transmission attempt to be sent with the confirm_rx. */ static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node, int dest_port) { unsigned long key = (u64)dest_node << 32 | dest_port; struct qrtr_tx_flow *flow; rcu_read_lock(); flow = radix_tree_lookup(&node->qrtr_tx_flow, key); rcu_read_unlock(); if (flow) { spin_lock_irq(&flow->resume_tx.lock); flow->tx_failed = 1; spin_unlock_irq(&flow->resume_tx.lock); } } /* Pass an outgoing packet socket buffer to the endpoint driver. */ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to) { struct qrtr_hdr_v1 *hdr; size_t len = skb->len; int rc, confirm_rx; confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type); if (confirm_rx < 0) { kfree_skb(skb); return confirm_rx; } hdr = skb_push(skb, sizeof(*hdr)); hdr->version = cpu_to_le32(QRTR_PROTO_VER_1); hdr->type = cpu_to_le32(type); hdr->src_node_id = cpu_to_le32(from->sq_node); hdr->src_port_id = cpu_to_le32(from->sq_port); if (to->sq_port == QRTR_PORT_CTRL) { hdr->dst_node_id = cpu_to_le32(node->nid); hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); } else { hdr->dst_node_id = cpu_to_le32(to->sq_node); hdr->dst_port_id = cpu_to_le32(to->sq_port); } hdr->size = cpu_to_le32(len); hdr->confirm_rx = !!confirm_rx; rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); if (!rc) { mutex_lock(&node->ep_lock); rc = -ENODEV; if (node->ep) rc = node->ep->xmit(node->ep, skb); else kfree_skb(skb); mutex_unlock(&node->ep_lock); } /* Need to ensure that a subsequent message carries the otherwise lost * confirm_rx flag if we dropped this one */ if (rc && confirm_rx) qrtr_tx_flow_failed(node, to->sq_node, to->sq_port); return rc; } /* Lookup node by id. * * callers must release with qrtr_node_release() */ static struct qrtr_node *qrtr_node_lookup(unsigned int nid) { struct qrtr_node *node; unsigned long flags; mutex_lock(&qrtr_node_lock); spin_lock_irqsave(&qrtr_nodes_lock, flags); node = radix_tree_lookup(&qrtr_nodes, nid); node = qrtr_node_acquire(node); spin_unlock_irqrestore(&qrtr_nodes_lock, flags); mutex_unlock(&qrtr_node_lock); return node; } /* Assign node id to node. * * This is mostly useful for automatic node id assignment, based on * the source id in the incoming packet. */ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) { unsigned long flags; if (nid == QRTR_EP_NID_AUTO) return; spin_lock_irqsave(&qrtr_nodes_lock, flags); radix_tree_insert(&qrtr_nodes, nid, node); if (node->nid == QRTR_EP_NID_AUTO) node->nid = nid; spin_unlock_irqrestore(&qrtr_nodes_lock, flags); } /** * qrtr_endpoint_post() - post incoming data * @ep: endpoint handle * @data: data pointer * @len: size of data in bytes * * Return: 0 on success; negative error code on failure */ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) { struct qrtr_node *node = ep->node; const struct qrtr_hdr_v1 *v1; const struct qrtr_hdr_v2 *v2; struct qrtr_sock *ipc; struct sk_buff *skb; struct qrtr_cb *cb; size_t size; unsigned int ver; size_t hdrlen; if (len == 0 || len & 3) return -EINVAL; skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN); if (!skb) return -ENOMEM; cb = (struct qrtr_cb *)skb->cb; /* Version field in v1 is little endian, so this works for both cases */ ver = *(u8*)data; switch (ver) { case QRTR_PROTO_VER_1: if (len < sizeof(*v1)) goto err; v1 = data; hdrlen = sizeof(*v1); cb->type = le32_to_cpu(v1->type); cb->src_node = le32_to_cpu(v1->src_node_id); cb->src_port = le32_to_cpu(v1->src_port_id); cb->confirm_rx = !!v1->confirm_rx; cb->dst_node = le32_to_cpu(v1->dst_node_id); cb->dst_port = le32_to_cpu(v1->dst_port_id); size = le32_to_cpu(v1->size); break; case QRTR_PROTO_VER_2: if (len < sizeof(*v2)) goto err; v2 = data; hdrlen = sizeof(*v2) + v2->optlen; cb->type = v2->type; cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX); cb->src_node = le16_to_cpu(v2->src_node_id); cb->src_port = le16_to_cpu(v2->src_port_id); cb->dst_node = le16_to_cpu(v2->dst_node_id); cb->dst_port = le16_to_cpu(v2->dst_port_id); if (cb->src_port == (u16)QRTR_PORT_CTRL) cb->src_port = QRTR_PORT_CTRL; if (cb->dst_port == (u16)QRTR_PORT_CTRL) cb->dst_port = QRTR_PORT_CTRL; size = le32_to_cpu(v2->size); break; default: pr_err("qrtr: Invalid version %d\n", ver); goto err; } if (cb->dst_port == QRTR_PORT_CTRL_LEGACY) cb->dst_port = QRTR_PORT_CTRL; if (!size || len != ALIGN(size, 4) + hdrlen) goto err; if ((cb->type == QRTR_TYPE_NEW_SERVER || cb->type == QRTR_TYPE_RESUME_TX) && size < sizeof(struct qrtr_ctrl_pkt)) goto err; if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA && cb->type != QRTR_TYPE_RESUME_TX) goto err; skb_put_data(skb, data + hdrlen, size); qrtr_node_assign(node, cb->src_node); if (cb->type == QRTR_TYPE_NEW_SERVER) { /* Remote node endpoint can bridge other distant nodes */ const struct qrtr_ctrl_pkt *pkt; pkt = data + hdrlen; qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); } if (cb->type == QRTR_TYPE_RESUME_TX) { qrtr_tx_resume(node, skb); } else { ipc = qrtr_port_lookup(cb->dst_port); if (!ipc) goto err; if (sock_queue_rcv_skb(&ipc->sk, skb)) { qrtr_port_put(ipc); goto err; } qrtr_port_put(ipc); } return 0; err: kfree_skb(skb); return -EINVAL; } EXPORT_SYMBOL_GPL(qrtr_endpoint_post); /** * qrtr_alloc_ctrl_packet() - allocate control packet skb * @pkt: reference to qrtr_ctrl_pkt pointer * @flags: the type of memory to allocate * * Returns newly allocated sk_buff, or NULL on failure * * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and * on success returns a reference to the control packet in @pkt. */ static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt, gfp_t flags) { const int pkt_len = sizeof(struct qrtr_ctrl_pkt); struct sk_buff *skb; skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, flags); if (!skb) return NULL; skb_reserve(skb, QRTR_HDR_MAX_SIZE); *pkt = skb_put_zero(skb, pkt_len); return skb; } /** * qrtr_endpoint_register() - register a new endpoint * @ep: endpoint to register * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment * Return: 0 on success; negative error code on failure * * The specified endpoint must have the xmit function pointer set on call. */ int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) { struct qrtr_node *node; if (!ep || !ep->xmit) return -EINVAL; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; kref_init(&node->ref); mutex_init(&node->ep_lock); skb_queue_head_init(&node->rx_queue); node->nid = QRTR_EP_NID_AUTO; node->ep = ep; INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL); mutex_init(&node->qrtr_tx_lock); qrtr_node_assign(node, nid); mutex_lock(&qrtr_node_lock); list_add(&node->item, &qrtr_all_nodes); mutex_unlock(&qrtr_node_lock); ep->node = node; return 0; } EXPORT_SYMBOL_GPL(qrtr_endpoint_register); /** * qrtr_endpoint_unregister - unregister endpoint * @ep: endpoint to unregister */ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) { struct qrtr_node *node = ep->node; struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL}; struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL}; struct radix_tree_iter iter; struct qrtr_ctrl_pkt *pkt; struct qrtr_tx_flow *flow; struct sk_buff *skb; unsigned long flags; void __rcu **slot; mutex_lock(&node->ep_lock); node->ep = NULL; mutex_unlock(&node->ep_lock); /* Notify the local controller about the event */ spin_lock_irqsave(&qrtr_nodes_lock, flags); radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) { if (*slot != node) continue; src.sq_node = iter.index; skb = qrtr_alloc_ctrl_packet(&pkt, GFP_ATOMIC); if (skb) { pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE); qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst); } } spin_unlock_irqrestore(&qrtr_nodes_lock, flags); /* Wake up any transmitters waiting for resume-tx from the node */ mutex_lock(&node->qrtr_tx_lock); radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { flow = *slot; wake_up_interruptible_all(&flow->resume_tx); } mutex_unlock(&node->qrtr_tx_lock); qrtr_node_release(node); ep->node = NULL; } EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister); /* Lookup socket by port. * * Callers must release with qrtr_port_put() */ static struct qrtr_sock *qrtr_port_lookup(int port) { struct qrtr_sock *ipc; if (port == QRTR_PORT_CTRL) port = 0; rcu_read_lock(); ipc = xa_load(&qrtr_ports, port); if (ipc) sock_hold(&ipc->sk); rcu_read_unlock(); return ipc; } /* Release acquired socket. */ static void qrtr_port_put(struct qrtr_sock *ipc) { sock_put(&ipc->sk); } /* Remove port assignment. */ static void qrtr_port_remove(struct qrtr_sock *ipc) { struct qrtr_ctrl_pkt *pkt; struct sk_buff *skb; int port = ipc->us.sq_port; struct sockaddr_qrtr to; to.sq_family = AF_QIPCRTR; to.sq_node = QRTR_NODE_BCAST; to.sq_port = QRTR_PORT_CTRL; skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL); if (skb) { pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); pkt->client.node = cpu_to_le32(ipc->us.sq_node); pkt->client.port = cpu_to_le32(ipc->us.sq_port); skb_set_owner_w(skb, &ipc->sk); qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us, &to); } if (port == QRTR_PORT_CTRL) port = 0; __sock_put(&ipc->sk); xa_erase(&qrtr_ports, port); /* Ensure that if qrtr_port_lookup() did enter the RCU read section we * wait for it to up increment the refcount */ synchronize_rcu(); } /* Assign port number to socket. * * Specify port in the integer pointed to by port, and it will be adjusted * on return as necesssary. * * Port may be: * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET] * <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN * >QRTR_MIN_EPH_SOCKET: Specified; available to all */ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) { int rc; if (!*port) { rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE, GFP_KERNEL); } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { rc = -EACCES; } else if (*port == QRTR_PORT_CTRL) { rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL); } else { rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL); } if (rc == -EBUSY) return -EADDRINUSE; else if (rc < 0) return rc; sock_hold(&ipc->sk); return 0; } /* Reset all non-control ports */ static void qrtr_reset_ports(void) { struct qrtr_sock *ipc; unsigned long index; rcu_read_lock(); xa_for_each_start(&qrtr_ports, index, ipc, 1) { sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; sk_error_report(&ipc->sk); sock_put(&ipc->sk); } rcu_read_unlock(); } /* Bind socket to address. * * Socket should be locked upon call. */ static int __qrtr_bind(struct socket *sock, const struct sockaddr_qrtr *addr, int zapped) { struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; int port; int rc; /* rebinding ok */ if (!zapped && addr->sq_port == ipc->us.sq_port) return 0; port = addr->sq_port; rc = qrtr_port_assign(ipc, &port); if (rc) return rc; /* unbind previous, if any */ if (!zapped) qrtr_port_remove(ipc); ipc->us.sq_port = port; sock_reset_flag(sk, SOCK_ZAPPED); /* Notify all open ports about the new controller */ if (port == QRTR_PORT_CTRL) qrtr_reset_ports(); return 0; } /* Auto bind to an ephemeral port. */ static int qrtr_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct sockaddr_qrtr addr; if (!sock_flag(sk, SOCK_ZAPPED)) return 0; addr.sq_family = AF_QIPCRTR; addr.sq_node = qrtr_local_nid; addr.sq_port = 0; return __qrtr_bind(sock, &addr, 1); } /* Bind socket to specified sockaddr. */ static int qrtr_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; int rc; if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) return -EINVAL; if (addr->sq_node != ipc->us.sq_node) return -EINVAL; lock_sock(sk); rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED)); release_sock(sk); return rc; } /* Queue packet to local peer socket. */ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to) { struct qrtr_sock *ipc; struct qrtr_cb *cb; ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ if (ipc) qrtr_port_put(ipc); kfree_skb(skb); return -ENODEV; } cb = (struct qrtr_cb *)skb->cb; cb->src_node = from->sq_node; cb->src_port = from->sq_port; if (sock_queue_rcv_skb(&ipc->sk, skb)) { qrtr_port_put(ipc); kfree_skb(skb); return -ENOSPC; } qrtr_port_put(ipc); return 0; } /* Queue packet for broadcast. */ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to) { struct sk_buff *skbn; mutex_lock(&qrtr_node_lock); list_for_each_entry(node, &qrtr_all_nodes, item) { skbn = pskb_copy(skb, GFP_KERNEL); if (!skbn) break; skb_set_owner_w(skbn, skb->sk); qrtr_node_enqueue(node, skbn, type, from, to); } mutex_unlock(&qrtr_node_lock); qrtr_local_enqueue(NULL, skb, type, from, to); return 0; } static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int, struct sockaddr_qrtr *, struct sockaddr_qrtr *); __le32 qrtr_type = cpu_to_le32(QRTR_TYPE_DATA); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; struct qrtr_node *node; struct sk_buff *skb; size_t plen; u32 type; int rc; if (msg->msg_flags & ~(MSG_DONTWAIT)) return -EINVAL; if (len > 65535) return -EMSGSIZE; lock_sock(sk); if (addr) { if (msg->msg_namelen < sizeof(*addr)) { release_sock(sk); return -EINVAL; } if (addr->sq_family != AF_QIPCRTR) { release_sock(sk); return -EINVAL; } rc = qrtr_autobind(sock); if (rc) { release_sock(sk); return rc; } } else if (sk->sk_state == TCP_ESTABLISHED) { addr = &ipc->peer; } else { release_sock(sk); return -ENOTCONN; } node = NULL; if (addr->sq_node == QRTR_NODE_BCAST) { if (addr->sq_port != QRTR_PORT_CTRL && qrtr_local_nid != QRTR_NODE_BCAST) { release_sock(sk); return -ENOTCONN; } enqueue_fn = qrtr_bcast_enqueue; } else if (addr->sq_node == ipc->us.sq_node) { enqueue_fn = qrtr_local_enqueue; } else { node = qrtr_node_lookup(addr->sq_node); if (!node) { release_sock(sk); return -ECONNRESET; } enqueue_fn = qrtr_node_enqueue; } plen = (len + 3) & ~3; skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) { rc = -ENOMEM; goto out_node; } skb_reserve(skb, QRTR_HDR_MAX_SIZE); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc) { kfree_skb(skb); goto out_node; } if (ipc->us.sq_port == QRTR_PORT_CTRL) { if (len < 4) { rc = -EINVAL; kfree_skb(skb); goto out_node; } /* control messages already require the type as 'command' */ skb_copy_bits(skb, 0, &qrtr_type, 4); } type = le32_to_cpu(qrtr_type); rc = enqueue_fn(node, skb, type, &ipc->us, addr); if (rc >= 0) rc = len; out_node: qrtr_node_release(node); release_sock(sk); return rc; } static int qrtr_send_resume_tx(struct qrtr_cb *cb) { struct sockaddr_qrtr remote = { AF_QIPCRTR, cb->src_node, cb->src_port }; struct sockaddr_qrtr local = { AF_QIPCRTR, cb->dst_node, cb->dst_port }; struct qrtr_ctrl_pkt *pkt; struct qrtr_node *node; struct sk_buff *skb; int ret; node = qrtr_node_lookup(remote.sq_node); if (!node) return -EINVAL; skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL); if (!skb) return -ENOMEM; pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX); pkt->client.node = cpu_to_le32(cb->dst_node); pkt->client.port = cpu_to_le32(cb->dst_port); ret = qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, &local, &remote); qrtr_node_release(node); return ret; } static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); struct sock *sk = sock->sk; struct sk_buff *skb; struct qrtr_cb *cb; int copied, rc; lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) { release_sock(sk); return -EADDRNOTAVAIL; } skb = skb_recv_datagram(sk, flags, &rc); if (!skb) { release_sock(sk); return rc; } cb = (struct qrtr_cb *)skb->cb; copied = skb->len; if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; } rc = skb_copy_datagram_msg(skb, 0, msg, copied); if (rc < 0) goto out; rc = copied; if (addr) { /* There is an anonymous 2-byte hole after sq_family, * make sure to clear it. */ memset(addr, 0, sizeof(*addr)); addr->sq_family = AF_QIPCRTR; addr->sq_node = cb->src_node; addr->sq_port = cb->src_port; msg->msg_namelen = sizeof(*addr); } out: if (cb->confirm_rx) qrtr_send_resume_tx(cb); skb_free_datagram(sk, skb); release_sock(sk); return rc; } static int qrtr_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; int rc; if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) return -EINVAL; lock_sock(sk); sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; rc = qrtr_autobind(sock); if (rc) { release_sock(sk); return rc; } ipc->peer = *addr; sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; release_sock(sk); return 0; } static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, int peer) { struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sockaddr_qrtr qaddr; struct sock *sk = sock->sk; lock_sock(sk); if (peer) { if (sk->sk_state != TCP_ESTABLISHED) { release_sock(sk); return -ENOTCONN; } qaddr = ipc->peer; } else { qaddr = ipc->us; } release_sock(sk); qaddr.sq_family = AF_QIPCRTR; memcpy(saddr, &qaddr, sizeof(qaddr)); return sizeof(qaddr); } static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; struct sockaddr_qrtr *sq; struct sk_buff *skb; struct ifreq ifr; long len = 0; int rc = 0; lock_sock(sk); switch (cmd) { case TIOCOUTQ: len = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (len < 0) len = 0; rc = put_user(len, (int __user *)argp); break; case TIOCINQ: skb = skb_peek(&sk->sk_receive_queue); if (skb) len = skb->len; rc = put_user(len, (int __user *)argp); break; case SIOCGIFADDR: if (get_user_ifreq(&ifr, NULL, argp)) { rc = -EFAULT; break; } sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; *sq = ipc->us; if (put_user_ifreq(&ifr, argp)) { rc = -EFAULT; break; } break; case SIOCADDRT: case SIOCDELRT: case SIOCSIFADDR: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: rc = -EINVAL; break; default: rc = -ENOIOCTLCMD; break; } release_sock(sk); return rc; } static int qrtr_release(struct socket *sock) { struct sock *sk = sock->sk; struct qrtr_sock *ipc; if (!sk) return 0; lock_sock(sk); ipc = qrtr_sk(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); sock_orphan(sk); sock->sk = NULL; if (!sock_flag(sk, SOCK_ZAPPED)) qrtr_port_remove(ipc); skb_queue_purge(&sk->sk_receive_queue); release_sock(sk); sock_put(sk); return 0; } static const struct proto_ops qrtr_proto_ops = { .owner = THIS_MODULE, .family = AF_QIPCRTR, .bind = qrtr_bind, .connect = qrtr_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .listen = sock_no_listen, .sendmsg = qrtr_sendmsg, .recvmsg = qrtr_recvmsg, .getname = qrtr_getname, .ioctl = qrtr_ioctl, .gettstamp = sock_gettstamp, .poll = datagram_poll, .shutdown = sock_no_shutdown, .release = qrtr_release, .mmap = sock_no_mmap, }; static struct proto qrtr_proto = { .name = "QIPCRTR", .owner = THIS_MODULE, .obj_size = sizeof(struct qrtr_sock), }; static int qrtr_create(struct net *net, struct socket *sock, int protocol, int kern) { struct qrtr_sock *ipc; struct sock *sk; if (sock->type != SOCK_DGRAM) return -EPROTOTYPE; sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern); if (!sk) return -ENOMEM; sock_set_flag(sk, SOCK_ZAPPED); sock_init_data(sock, sk); sock->ops = &qrtr_proto_ops; ipc = qrtr_sk(sk); ipc->us.sq_family = AF_QIPCRTR; ipc->us.sq_node = qrtr_local_nid; ipc->us.sq_port = 0; return 0; } static const struct net_proto_family qrtr_family = { .owner = THIS_MODULE, .family = AF_QIPCRTR, .create = qrtr_create, }; static int __init qrtr_proto_init(void) { int rc; rc = proto_register(&qrtr_proto, 1); if (rc) return rc; rc = sock_register(&qrtr_family); if (rc) goto err_proto; rc = qrtr_ns_init(); if (rc) goto err_sock; return 0; err_sock: sock_unregister(qrtr_family.family); err_proto: proto_unregister(&qrtr_proto); return rc; } postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { qrtr_ns_remove(); sock_unregister(qrtr_family.family); proto_unregister(&qrtr_proto); } module_exit(qrtr_proto_fini); MODULE_DESCRIPTION("Qualcomm IPC-router driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_NETPROTO(PF_QIPCRTR); |
| 5 5 4 2 2 1 1 5 1 6 5 4 3 3 6 3 5 4 3 2 1 5 1 6 6 5 2 4 1 3 6 5 4 3 1 2 1 1 5 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "fs.h" struct io_rename { struct file *file; int old_dfd; int new_dfd; struct filename *oldpath; struct filename *newpath; int flags; }; struct io_unlink { struct file *file; int dfd; int flags; struct filename *filename; }; struct io_mkdir { struct file *file; int dfd; umode_t mode; struct filename *filename; }; struct io_link { struct file *file; int old_dfd; int new_dfd; struct filename *oldpath; struct filename *newpath; int flags; }; int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); const char __user *oldf, *newf; if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; ren->old_dfd = READ_ONCE(sqe->fd); oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ren->new_dfd = READ_ONCE(sqe->len); ren->flags = READ_ONCE(sqe->rename_flags); ren->oldpath = getname(oldf); if (IS_ERR(ren->oldpath)) return PTR_ERR(ren->oldpath); ren->newpath = getname(newf); if (IS_ERR(ren->newpath)) { putname(ren->oldpath); return PTR_ERR(ren->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_renameat(struct io_kiocb *req, unsigned int issue_flags) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, ren->newpath, ren->flags); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_COMPLETE; } void io_renameat_cleanup(struct io_kiocb *req) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); putname(ren->oldpath); putname(ren->newpath); } int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink); const char __user *fname; if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; un->dfd = READ_ONCE(sqe->fd); un->flags = READ_ONCE(sqe->unlink_flags); if (un->flags & ~AT_REMOVEDIR) return -EINVAL; fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); un->filename = getname(fname); if (IS_ERR(un->filename)) return PTR_ERR(un->filename); req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); if (un->flags & AT_REMOVEDIR) ret = do_rmdir(un->dfd, un->filename); else ret = do_unlinkat(un->dfd, un->filename); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_COMPLETE; } void io_unlinkat_cleanup(struct io_kiocb *req) { struct io_unlink *ul = io_kiocb_to_cmd(req, struct io_unlink); putname(ul->filename); } int io_mkdirat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir); const char __user *fname; if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; mkd->dfd = READ_ONCE(sqe->fd); mkd->mode = READ_ONCE(sqe->len); fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); mkd->filename = getname(fname); if (IS_ERR(mkd->filename)) return PTR_ERR(mkd->filename); req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags) { struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_COMPLETE; } void io_mkdirat_cleanup(struct io_kiocb *req) { struct io_mkdir *md = io_kiocb_to_cmd(req, struct io_mkdir); putname(md->filename); } int io_symlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); const char __user *oldpath, *newpath; if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; sl->new_dfd = READ_ONCE(sqe->fd); oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); sl->oldpath = getname(oldpath); if (IS_ERR(sl->oldpath)) return PTR_ERR(sl->oldpath); sl->newpath = getname(newpath); if (IS_ERR(sl->newpath)) { putname(sl->oldpath); return PTR_ERR(sl->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_COMPLETE; } int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); const char __user *oldf, *newf; if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; lnk->old_dfd = READ_ONCE(sqe->fd); lnk->new_dfd = READ_ONCE(sqe->len); oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); lnk->flags = READ_ONCE(sqe->hardlink_flags); lnk->oldpath = getname_uflags(oldf, lnk->flags); if (IS_ERR(lnk->oldpath)) return PTR_ERR(lnk->oldpath); lnk->newpath = getname(newf); if (IS_ERR(lnk->newpath)) { putname(lnk->oldpath); return PTR_ERR(lnk->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_linkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd, lnk->newpath, lnk->flags); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_COMPLETE; } void io_link_cleanup(struct io_kiocb *req) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); putname(sl->oldpath); putname(sl->newpath); } |
| 22 22 22 22 22 22 6 22 1 22 24 25 7 2 25 1 24 2 1 2 1 2 1 1 1 1 1 1 29 1 29 28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | #include <linux/kernel.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/net.h> #include <linux/io_uring.h> #include "io_uring.h" #include "notif.h" #include "rsrc.h" static const struct ubuf_info_ops io_ubuf_ops; static void io_notif_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw) { struct io_kiocb *notif = tw_req.req; struct io_notif_data *nd = io_notif_to_data(notif); struct io_ring_ctx *ctx = notif->ctx; lockdep_assert_held(&ctx->uring_lock); do { notif = cmd_to_io_kiocb(nd); if (WARN_ON_ONCE(ctx != notif->ctx)) return; lockdep_assert(refcount_read(&nd->uarg.refcnt) == 0); if (unlikely(nd->zc_report) && (nd->zc_copied || !nd->zc_used)) notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED; if (nd->account_pages && notif->ctx->user) { __io_unaccount_mem(notif->ctx->user, nd->account_pages); nd->account_pages = 0; } nd = nd->next; io_req_task_complete((struct io_tw_req){notif}, tw); } while (nd); } void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg, bool success) { struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); struct io_kiocb *notif = cmd_to_io_kiocb(nd); unsigned tw_flags; if (nd->zc_report) { if (success && !nd->zc_used && skb) WRITE_ONCE(nd->zc_used, true); else if (!success && !nd->zc_copied) WRITE_ONCE(nd->zc_copied, true); } if (!refcount_dec_and_test(&uarg->refcnt)) return; if (nd->head != nd) { io_tx_ubuf_complete(skb, &nd->head->uarg, success); return; } tw_flags = nd->next ? 0 : IOU_F_TWQ_LAZY_WAKE; notif->io_task_work.func = io_notif_tw_complete; __io_req_task_work_add(notif, tw_flags); } static int io_link_skb(struct sk_buff *skb, struct ubuf_info *uarg) { struct io_notif_data *nd, *prev_nd; struct io_kiocb *prev_notif, *notif; struct ubuf_info *prev_uarg = skb_zcopy(skb); nd = container_of(uarg, struct io_notif_data, uarg); notif = cmd_to_io_kiocb(nd); if (!prev_uarg) { net_zcopy_get(&nd->uarg); skb_zcopy_init(skb, &nd->uarg); return 0; } /* handle it separately as we can't link a notif to itself */ if (unlikely(prev_uarg == &nd->uarg)) return 0; /* we can't join two links together, just request a fresh skb */ if (unlikely(nd->head != nd || nd->next)) return -EEXIST; /* don't mix zc providers */ if (unlikely(prev_uarg->ops != &io_ubuf_ops)) return -EEXIST; prev_nd = container_of(prev_uarg, struct io_notif_data, uarg); prev_notif = cmd_to_io_kiocb(prev_nd); /* make sure all notifications can be finished in the same task_work */ if (unlikely(notif->ctx != prev_notif->ctx || notif->tctx != prev_notif->tctx)) return -EEXIST; nd->head = prev_nd->head; nd->next = prev_nd->next; prev_nd->next = nd; net_zcopy_get(&nd->head->uarg); return 0; } static const struct ubuf_info_ops io_ubuf_ops = { .complete = io_tx_ubuf_complete, .link_skb = io_link_skb, }; struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) __must_hold(&ctx->uring_lock) { struct io_kiocb *notif; struct io_notif_data *nd; if (unlikely(!io_alloc_req(ctx, ¬if))) return NULL; notif->ctx = ctx; notif->opcode = IORING_OP_NOP; notif->flags = 0; notif->file = NULL; notif->tctx = current->io_uring; io_get_task_refs(1); notif->file_node = NULL; notif->buf_node = NULL; nd = io_notif_to_data(notif); nd->zc_report = false; nd->account_pages = 0; nd->next = NULL; nd->head = nd; nd->uarg.flags = IO_NOTIF_UBUF_FLAGS; nd->uarg.ops = &io_ubuf_ops; refcount_set(&nd->uarg.refcnt, 1); return notif; } |
| 2 1 1 2 8 3 6 2 8 3 2 3 3 16 15 14 8 13 15 8 8 10 1 1 10 8 2 10 10 10 1 9 12 9 8 7 6 5 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | // SPDX-License-Identifier: GPL-2.0 /* * This contains the io-permission bitmap code - written by obz, with changes * by Linus. 32/64 bits code unification by Miguel Botón. */ #include <linux/capability.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/bitmap.h> #include <linux/ioport.h> #include <linux/sched.h> #include <linux/slab.h> #include <asm/io_bitmap.h> #include <asm/desc.h> #include <asm/syscalls.h> #ifdef CONFIG_X86_IOPL_IOPERM static atomic64_t io_bitmap_sequence; void io_bitmap_share(struct task_struct *tsk) { /* Can be NULL when current->thread.iopl_emul == 3 */ if (current->thread.io_bitmap) { /* * Take a refcount on current's bitmap. It can be used by * both tasks as long as none of them changes the bitmap. */ refcount_inc(¤t->thread.io_bitmap->refcnt); tsk->thread.io_bitmap = current->thread.io_bitmap; } set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } static void task_update_io_bitmap(void) { struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; if (t->iopl_emul == 3 || t->io_bitmap) { /* TSS update is handled on exit to user space */ set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } else { clear_tsk_thread_flag(tsk, TIF_IO_BITMAP); /* Invalidate TSS */ preempt_disable(); tss_update_io_bitmap(); preempt_enable(); } } void io_bitmap_exit(struct task_struct *tsk) { struct io_bitmap *iobm = tsk->thread.io_bitmap; tsk->thread.io_bitmap = NULL; /* * Don't touch the TSS when invoked on a failed fork(). TSS * reflects the state of @current and not the state of @tsk. */ if (tsk == current) task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } /* * This changes the io permissions bitmap in the current task. */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct *t = ¤t->thread; unsigned int i, max_long; struct io_bitmap *iobm; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; if (turn_on && (!capable(CAP_SYS_RAWIO) || security_locked_down(LOCKDOWN_IOPORT))) return -EPERM; /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ iobm = t->io_bitmap; if (!iobm) { /* No point to allocate a bitmap just to clear permissions */ if (!turn_on) return 0; iobm = kmalloc(sizeof(*iobm), GFP_KERNEL); if (!iobm) return -ENOMEM; memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap)); refcount_set(&iobm->refcnt, 1); } /* * If the bitmap is not shared, then nothing can take a refcount as * current can obviously not fork at the same time. If it's shared * duplicate it and drop the refcount on the original one. */ if (refcount_read(&iobm->refcnt) > 1) { iobm = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL); if (!iobm) return -ENOMEM; refcount_set(&iobm->refcnt, 1); io_bitmap_exit(current); } /* * Store the bitmap pointer (might be the same if the task already * head one). Must be done here so freeing the bitmap when all * permissions are dropped has the pointer set up. */ t->io_bitmap = iobm; /* Mark it active for context switching and exit to user mode */ set_thread_flag(TIF_IO_BITMAP); /* * Update the tasks bitmap. The update of the TSS bitmap happens on * exit to user mode. So this needs no protection. */ if (turn_on) bitmap_clear(iobm->bitmap, from, num); else bitmap_set(iobm->bitmap, from, num); /* * Search for a (possibly new) maximum. This is simple and stupid, * to keep it obviously correct: */ max_long = UINT_MAX; for (i = 0; i < IO_BITMAP_LONGS; i++) { if (iobm->bitmap[i] != ~0UL) max_long = i; } /* All permissions dropped? */ if (max_long == UINT_MAX) { io_bitmap_exit(current); return 0; } iobm->max = (max_long + 1) * sizeof(unsigned long); /* * Update the sequence number to force a TSS update on return to * user mode. */ iobm->sequence = atomic64_inc_return(&io_bitmap_sequence); return 0; } SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) { return ksys_ioperm(from, num, turn_on); } /* * The sys_iopl functionality depends on the level argument, which if * granted for the task is used to enable access to all 65536 I/O ports. * * This does not use the IOPL mechanism provided by the CPU as that would * also allow the user space task to use the CLI/STI instructions. * * Disabling interrupts in a user space task is dangerous as it might lock * up the machine and the semantics vs. syscalls and exceptions is * undefined. * * Setting IOPL to level 0-2 is disabling I/O permissions. Level 3 * 3 enables them. * * IOPL is strictly per thread and inherited on fork. */ SYSCALL_DEFINE1(iopl, unsigned int, level) { struct thread_struct *t = ¤t->thread; unsigned int old; if (level > 3) return -EINVAL; old = t->iopl_emul; /* No point in going further if nothing changes */ if (level == old) return 0; /* Trying to gain more privileges? */ if (level > old) { if (!capable(CAP_SYS_RAWIO) || security_locked_down(LOCKDOWN_IOPORT)) return -EPERM; } t->iopl_emul = level; task_update_io_bitmap(); return 0; } #else /* CONFIG_X86_IOPL_IOPERM */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { return -ENOSYS; } SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) { return -ENOSYS; } SYSCALL_DEFINE1(iopl, unsigned int, level) { return -ENOSYS; } #endif |
| 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _FAT_H #define _FAT_H #include <linux/buffer_head.h> #include <linux/nls.h> #include <linux/hash.h> #include <linux/ratelimit.h> #include <linux/msdos_fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> /* * vfat shortname flags */ #define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ #define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ #define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ #define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ #define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ #define FAT_ERRORS_CONT 1 /* ignore error and continue */ #define FAT_ERRORS_PANIC 2 /* panic on error */ #define FAT_ERRORS_RO 3 /* remount r/o on error */ #define FAT_NFS_STALE_RW 1 /* NFS RW support, can cause ESTALE */ #define FAT_NFS_NOSTALE_RO 2 /* NFS RO support, no ESTALE issue */ struct fat_mount_options { kuid_t fs_uid; kgid_t fs_gid; unsigned short fs_fmask; unsigned short fs_dmask; unsigned short codepage; /* Codepage for shortname conversions */ int time_offset; /* Offset of timestamps from UTC (in minutes) */ char *iocharset; /* Charset used for filename input/display */ unsigned short shortname; /* flags for shortname display/create rule */ unsigned char name_check; /* r = relaxed, n = normal, s = strict */ unsigned char errors; /* On error: continue, panic, remount-ro */ unsigned char nfs; /* NFS support: nostale_ro, stale_rw */ unsigned short allow_utime;/* permission for setting the [am]time */ unsigned quiet:1, /* set = fake successful chmods and chowns */ showexec:1, /* set = only set x bit for com/exe/bat */ sys_immutable:1, /* set = system files are immutable */ dotsOK:1, /* set = hidden and system files are named '.filename' */ isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ utf8:1, /* Use of UTF-8 character set (Default) */ unicode_xlate:1, /* create escape sequences for unhandled Unicode */ numtail:1, /* Does first alias have a numeric '~1' type tail? */ flush:1, /* write things quickly */ nocase:1, /* Does this need case conversion? 0=need case conversion*/ usefree:1, /* Use free_clusters for FAT32 */ tz_set:1, /* Filesystem timestamps' offset set */ rodir:1, /* allow ATTR_RO for directory */ discard:1, /* Issue discard requests on deletions */ dos1xfloppy:1, /* Assume default BPB for DOS 1.x floppies */ debug:1; /* Not currently used */ }; #define FAT_HASH_BITS 8 #define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) /* * MS-DOS file system in-core superblock data */ struct msdos_sb_info { unsigned short sec_per_clus; /* sectors/cluster */ unsigned short cluster_bits; /* log2(cluster_size) */ unsigned int cluster_size; /* cluster size */ unsigned char fats, fat_bits; /* number of FATs, FAT bits (12,16 or 32) */ unsigned short fat_start; unsigned long fat_length; /* FAT start & length (sec.) */ unsigned long dir_start; unsigned short dir_entries; /* root dir start & entries */ unsigned long data_start; /* first data sector */ unsigned long max_cluster; /* maximum cluster number */ unsigned long root_cluster; /* first cluster of the root directory */ unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ struct mutex fat_lock; struct mutex nfs_build_inode_lock; struct mutex s_lock; unsigned int prev_free; /* previously allocated cluster number */ unsigned int free_clusters; /* -1 if undefined */ unsigned int free_clus_valid; /* is free_clusters valid? */ struct fat_mount_options options; struct nls_table *nls_disk; /* Codepage used on disk */ struct nls_table *nls_io; /* Charset used for input and display */ const void *dir_ops; /* Opaque; default directory operations */ int dir_per_block; /* dir entries per block */ int dir_per_block_bits; /* log2(dir_per_block) */ unsigned int vol_id; /*volume ID*/ int fatent_shift; const struct fatent_operations *fatent_ops; struct inode *fat_inode; struct inode *fsinfo_inode; struct ratelimit_state ratelimit; spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[FAT_HASH_SIZE]; spinlock_t dir_hash_lock; struct hlist_head dir_hashtable[FAT_HASH_SIZE]; unsigned int dirty; /* fs state before mount */ struct rcu_head rcu; }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ /* * MS-DOS file system inode data in memory */ struct msdos_inode_info { spinlock_t cache_lru_lock; struct list_head cache_lru; int nr_caches; /* for avoiding the race between fat_free() and fat_get_cluster() */ unsigned int cache_valid_id; /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */ loff_t mmu_private; /* physically allocated size */ int i_start; /* first cluster or 0 */ int i_logstart; /* logical first cluster */ int i_attrs; /* unused attribute bits */ loff_t i_pos; /* on-disk position of directory entry or 0 */ struct hlist_node i_fat_hash; /* hash by i_location */ struct hlist_node i_dir_hash; /* hash by i_logstart */ struct rw_semaphore truncate_lock; /* protect bmap against truncate */ struct timespec64 i_crtime; /* File creation (birth) time */ struct inode vfs_inode; }; struct fat_slot_info { loff_t i_pos; /* on-disk position of directory entry */ loff_t slot_off; /* offset for slot or de start */ int nr_slots; /* number of slots + 1(de) in filename */ struct msdos_dir_entry *de; struct buffer_head *bh; }; static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) { return sb->s_fs_info; } /* * Functions that determine the variant of the FAT file system (i.e., * whether this is FAT12, FAT16 or FAT32. */ static inline bool is_fat12(const struct msdos_sb_info *sbi) { return sbi->fat_bits == 12; } static inline bool is_fat16(const struct msdos_sb_info *sbi) { return sbi->fat_bits == 16; } static inline bool is_fat32(const struct msdos_sb_info *sbi) { return sbi->fat_bits == 32; } /* Maximum number of clusters */ static inline u32 max_fat(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); return is_fat32(sbi) ? MAX_FAT32 : is_fat16(sbi) ? MAX_FAT16 : MAX_FAT12; } static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) { return container_of(inode, struct msdos_inode_info, vfs_inode); } /* * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to * save ATTR_RO instead of ->i_mode. * * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only * bit, it's just used as flag for app. */ static inline int fat_mode_can_hold_ro(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); umode_t mask; if (S_ISDIR(inode->i_mode)) { if (!sbi->options.rodir) return 0; mask = ~sbi->options.fs_dmask; } else mask = ~sbi->options.fs_fmask; if (!(mask & S_IWUGO)) return 0; return 1; } /* Convert attribute bits and a mask to the UNIX mode. */ static inline umode_t fat_make_mode(struct msdos_sb_info *sbi, u8 attrs, umode_t mode) { if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir)) mode &= ~S_IWUGO; if (attrs & ATTR_DIR) return (mode & ~sbi->options.fs_dmask) | S_IFDIR; else return (mode & ~sbi->options.fs_fmask) | S_IFREG; } /* Return the FAT attribute byte for this inode */ static inline u8 fat_make_attrs(struct inode *inode) { u8 attrs = MSDOS_I(inode)->i_attrs; if (S_ISDIR(inode->i_mode)) attrs |= ATTR_DIR; if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO)) attrs |= ATTR_RO; return attrs; } static inline void fat_save_attrs(struct inode *inode, u8 attrs) { if (fat_mode_can_hold_ro(inode)) MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; else MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO); } static inline unsigned char fat_checksum(const __u8 *name) { unsigned char s = name[0]; s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; return s; } static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) { return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus + sbi->data_start; } static inline void fat_get_blknr_offset(struct msdos_sb_info *sbi, loff_t i_pos, sector_t *blknr, int *offset) { *blknr = i_pos >> sbi->dir_per_block_bits; *offset = i_pos & (sbi->dir_per_block - 1); } static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, struct inode *inode) { loff_t i_pos; #if BITS_PER_LONG == 32 spin_lock(&sbi->inode_hash_lock); #endif i_pos = MSDOS_I(inode)->i_pos; #if BITS_PER_LONG == 32 spin_unlock(&sbi->inode_hash_lock); #endif return i_pos; } static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) { #ifdef __BIG_ENDIAN while (len--) { *dst++ = src[0] | (src[1] << 8); src += 2; } #else memcpy(dst, src, len * 2); #endif } static inline int fat_get_start(const struct msdos_sb_info *sbi, const struct msdos_dir_entry *de) { int cluster = le16_to_cpu(de->start); if (is_fat32(sbi)) cluster |= (le16_to_cpu(de->starthi) << 16); return cluster; } static inline void fat_set_start(struct msdos_dir_entry *de, int cluster) { de->start = cpu_to_le16(cluster); de->starthi = cpu_to_le16(cluster >> 16); } static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) { #ifdef __BIG_ENDIAN while (len--) { dst[0] = *src & 0x00FF; dst[1] = (*src & 0xFF00) >> 8; dst += 2; src++; } #else memcpy(dst, src, len * 2); #endif } /* fat/cache.c */ extern void fat_cache_inval_inode(struct inode *inode); extern int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus); extern int fat_get_mapped_cluster(struct inode *inode, sector_t sector, sector_t last_block, unsigned long *mapped_blocks, sector_t *bmap); extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, unsigned long *mapped_blocks, int create, bool from_bmap); /* fat/dir.c */ extern const struct file_operations fat_dir_operations; extern int fat_search_long(struct inode *inode, const unsigned char *name, int name_len, struct fat_slot_info *sinfo); extern int fat_dir_empty(struct inode *dir); extern int fat_subdirs(struct inode *dir); extern int fat_scan(struct inode *dir, const unsigned char *name, struct fat_slot_info *sinfo); extern int fat_scan_logstart(struct inode *dir, int i_logstart, struct fat_slot_info *sinfo); extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, struct msdos_dir_entry **de); extern int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts); extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, struct fat_slot_info *sinfo); extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); /* fat/fatent.c */ struct fat_entry { int entry; union { u8 *ent12_p[2]; __le16 *ent16_p; __le32 *ent32_p; } u; int nr_bhs; struct buffer_head *bhs[2]; struct inode *fat_inode; }; static inline void fatent_init(struct fat_entry *fatent) { fatent->nr_bhs = 0; fatent->entry = 0; fatent->u.ent32_p = NULL; fatent->bhs[0] = fatent->bhs[1] = NULL; fatent->fat_inode = NULL; } static inline void fatent_set_entry(struct fat_entry *fatent, int entry) { fatent->entry = entry; fatent->u.ent32_p = NULL; } static inline void fatent_brelse(struct fat_entry *fatent) { int i; fatent->u.ent32_p = NULL; for (i = 0; i < fatent->nr_bhs; i++) brelse(fatent->bhs[i]); fatent->nr_bhs = 0; fatent->bhs[0] = fatent->bhs[1] = NULL; fatent->fat_inode = NULL; } static inline bool fat_valid_entry(struct msdos_sb_info *sbi, int entry) { return FAT_START_ENT <= entry && entry < sbi->max_cluster; } extern void fat_ent_access_init(struct super_block *sb); extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry); extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, int new, int wait); extern int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster); extern int fat_free_clusters(struct inode *inode, int cluster); extern int fat_count_free_clusters(struct super_block *sb); extern int fat_trim_fs(struct inode *inode, struct fstrim_range *range); /* fat/file.c */ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); extern const struct file_operations fat_file_operations; extern const struct inode_operations fat_file_inode_operations; extern int fat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern void fat_truncate_blocks(struct inode *inode, loff_t offset); extern int fat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); extern int fat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); /* fat/inode.c */ extern int fat_block_truncate_page(struct inode *inode, loff_t from); extern void fat_attach(struct inode *inode, loff_t i_pos); extern void fat_detach(struct inode *inode); extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); extern struct inode *fat_build_inode(struct super_block *sb, struct msdos_dir_entry *de, loff_t i_pos); extern int fat_sync_inode(struct inode *inode); extern int fat_fill_super(struct super_block *sb, struct fs_context *fc, void (*setup)(struct super_block *)); extern int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de); extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); extern const struct fs_parameter_spec fat_param_spec[]; int fat_init_fs_context(struct fs_context *fc, bool is_vfat); void fat_free_fc(struct fs_context *fc); int fat_parse_param(struct fs_context *fc, struct fs_parameter *param, bool is_vfat); int fat_reconfigure(struct fs_context *fc); static inline unsigned long fat_dir_hash(int logstart) { return hash_32(logstart, FAT_HASH_BITS); } extern int fat_add_cluster(struct inode *inode); /* fat/misc.c */ extern __printf(3, 4) __cold void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...); #define fat_fs_error(sb, fmt, args...) \ __fat_fs_error(sb, 1, fmt , ## args) #define fat_fs_error_ratelimit(sb, fmt, args...) \ __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args) #define FAT_PRINTK_PREFIX "%sFAT-fs (%s): " #define fat_msg(sb, level, fmt, args...) \ do { \ printk_index_subsys_emit(FAT_PRINTK_PREFIX, level, fmt, ##args);\ _fat_msg(sb, level, fmt, ##args); \ } while (0) __printf(3, 4) __cold void _fat_msg(struct super_block *sb, const char *level, const char *fmt, ...); #define fat_msg_ratelimit(sb, level, fmt, args...) \ do { \ if (__ratelimit(&MSDOS_SB(sb)->ratelimit)) \ fat_msg(sb, level, fmt, ## args); \ } while (0) extern int fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts, __le16 __time, __le16 __date, u8 time_cs); extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts, __le16 *time, __le16 *date, u8 *time_cs); extern struct timespec64 fat_truncate_atime(const struct msdos_sb_info *sbi, const struct timespec64 *ts); extern struct timespec64 fat_truncate_mtime(const struct msdos_sb_info *sbi, const struct timespec64 *ts); extern int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags); extern int fat_update_time(struct inode *inode, int flags); extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); void fat_cache_destroy(void); /* fat/nfs.c */ extern const struct export_operations fat_export_ops; extern const struct export_operations fat_export_ops_nostale; /* helper for printk */ typedef unsigned long long llu; #endif /* !_FAT_H */ |
| 3 3 3 3 4 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com) */ #include <linux/mm.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/spinlock.h> #include <net/ax25.h> static int min_ipdefmode[1], max_ipdefmode[] = {1}; static int min_axdefmode[1], max_axdefmode[] = {1}; static int min_backoff[1], max_backoff[] = {2}; static int min_conmode[1], max_conmode[] = {2}; static int min_window[] = {1}, max_window[] = {7}; static int min_ewindow[] = {1}, max_ewindow[] = {63}; static int min_t1[] = {1}, max_t1[] = {30000}; static int min_t2[] = {1}, max_t2[] = {20000}; static int min_t3[1], max_t3[] = {3600000}; static int min_idle[1], max_idle[] = {65535000}; static int min_n2[] = {1}, max_n2[] = {31}; static int min_paclen[] = {1}, max_paclen[] = {512}; static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; #ifdef CONFIG_AX25_DAMA_SLAVE static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; #endif static const struct ctl_table ax25_param_table[] = { { .procname = "ip_default_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_ipdefmode, .extra2 = &max_ipdefmode }, { .procname = "ax25_default_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_axdefmode, .extra2 = &max_axdefmode }, { .procname = "backoff_type", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_backoff, .extra2 = &max_backoff }, { .procname = "connect_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_conmode, .extra2 = &max_conmode }, { .procname = "standard_window_size", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_window, .extra2 = &max_window }, { .procname = "extended_window_size", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_ewindow, .extra2 = &max_ewindow }, { .procname = "t1_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_t1, .extra2 = &max_t1 }, { .procname = "t2_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_t2, .extra2 = &max_t2 }, { .procname = "t3_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_t3, .extra2 = &max_t3 }, { .procname = "idle_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_idle, .extra2 = &max_idle }, { .procname = "maximum_retry_count", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_n2, .extra2 = &max_n2 }, { .procname = "maximum_packet_length", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_paclen, .extra2 = &max_paclen }, { .procname = "protocol", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_proto, .extra2 = &max_proto }, #ifdef CONFIG_AX25_DAMA_SLAVE { .procname = "dama_slave_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_ds_timeout, .extra2 = &max_ds_timeout }, #endif }; int ax25_register_dev_sysctl(ax25_dev *ax25_dev) { char path[sizeof("net/ax25/") + IFNAMSIZ]; int k; struct ctl_table *table; table = kmemdup(ax25_param_table, sizeof(ax25_param_table), GFP_KERNEL); if (!table) return -ENOMEM; BUILD_BUG_ON(ARRAY_SIZE(ax25_param_table) != AX25_MAX_VALUES); for (k = 0; k < AX25_MAX_VALUES; k++) table[k].data = &ax25_dev->values[k]; snprintf(path, sizeof(path), "net/ax25/%s", ax25_dev->dev->name); ax25_dev->sysheader = register_net_sysctl_sz(&init_net, path, table, ARRAY_SIZE(ax25_param_table)); if (!ax25_dev->sysheader) { kfree(table); return -ENOMEM; } return 0; } void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev) { struct ctl_table_header *header = ax25_dev->sysheader; const struct ctl_table *table; if (header) { ax25_dev->sysheader = NULL; table = header->ctl_table_arg; unregister_net_sysctl_table(header); kfree(table); } } |
| 15 15 15 15 12 12 12 12 14 12 12 12 12 12 12 12 12 12 7 7 12 12 12 8 7 7 7 7 8 8 8 8 2 8 8 8 8 8 8 8 8 8 1 8 8 8 8 8 8 3 3 3 3 3 3 1 1 1 8 8 5 5 8 8 8 8 8 8 8 8 7 7 52 52 52 52 2 14 14 4 14 14 14 14 14 4 4 4 4 4 4 4 4 4 4 52 52 14 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 52 38 14 42 51 1 52 2 14 14 14 2 51 51 51 51 51 37 11 37 14 54 54 54 54 54 52 1 51 51 51 14 51 16 51 13 13 11 13 13 51 51 48 42 54 54 54 54 42 39 42 11 11 11 11 11 12 12 12 12 12 12 4 4 4 4 12 12 12 12 12 12 12 4 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 13 13 12 13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Red Hat, Inc. * Copyright (C) 2016-2023 Christoph Hellwig. */ #include <linux/iomap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/swap.h> #include <linux/migrate.h> #include "internal.h" #include "trace.h" #include "../internal.h" /* * Structure allocated for each folio to track per-block uptodate, dirty state * and I/O completions. */ struct iomap_folio_state { spinlock_t state_lock; unsigned int read_bytes_pending; atomic_t write_bytes_pending; /* * Each block has two bits in this bitmap: * Bits [0..blocks_per_folio) has the uptodate status. * Bits [b_p_f...(2*b_p_f)) has the dirty status. */ unsigned long state[]; }; static inline bool ifs_is_fully_uptodate(struct folio *folio, struct iomap_folio_state *ifs) { struct inode *inode = folio->mapping->host; return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)); } /* * Find the next uptodate block in the folio. end_blk is inclusive. * If no uptodate block is found, this will return end_blk + 1. */ static unsigned ifs_next_uptodate_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; return find_next_bit(ifs->state, end_blk + 1, start_blk); } /* * Find the next non-uptodate block in the folio. end_blk is inclusive. * If no non-uptodate block is found, this will return end_blk + 1. */ static unsigned ifs_next_nonuptodate_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; return find_next_zero_bit(ifs->state, end_blk + 1, start_blk); } static bool ifs_set_range_uptodate(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { struct inode *inode = folio->mapping->host; unsigned int first_blk = off >> inode->i_blkbits; unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; unsigned int nr_blks = last_blk - first_blk + 1; bitmap_set(ifs->state, first_blk, nr_blks); return ifs_is_fully_uptodate(folio, ifs); } static void iomap_set_range_uptodate(struct folio *folio, size_t off, size_t len) { struct iomap_folio_state *ifs = folio->private; unsigned long flags; bool uptodate = true; if (folio_test_uptodate(folio)) return; if (ifs) { spin_lock_irqsave(&ifs->state_lock, flags); uptodate = ifs_set_range_uptodate(folio, ifs, off, len); spin_unlock_irqrestore(&ifs->state_lock, flags); } if (uptodate) folio_mark_uptodate(folio); } /* * Find the next dirty block in the folio. end_blk is inclusive. * If no dirty block is found, this will return end_blk + 1. */ static unsigned ifs_next_dirty_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = folio->mapping->host; unsigned int blks = i_blocks_per_folio(inode, folio); return find_next_bit(ifs->state, blks + end_blk + 1, blks + start_blk) - blks; } /* * Find the next clean block in the folio. end_blk is inclusive. * If no clean block is found, this will return end_blk + 1. */ static unsigned ifs_next_clean_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = folio->mapping->host; unsigned int blks = i_blocks_per_folio(inode, folio); return find_next_zero_bit(ifs->state, blks + end_blk + 1, blks + start_blk) - blks; } static unsigned ifs_find_dirty_range(struct folio *folio, struct iomap_folio_state *ifs, u64 *range_start, u64 range_end) { struct inode *inode = folio->mapping->host; unsigned start_blk = offset_in_folio(folio, *range_start) >> inode->i_blkbits; unsigned end_blk = min_not_zero( offset_in_folio(folio, range_end) >> inode->i_blkbits, i_blocks_per_folio(inode, folio)) - 1; unsigned nblks; start_blk = ifs_next_dirty_block(folio, start_blk, end_blk); if (start_blk > end_blk) return 0; if (start_blk == end_blk) nblks = 1; else nblks = ifs_next_clean_block(folio, start_blk + 1, end_blk) - start_blk; *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits); return nblks << inode->i_blkbits; } static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start, u64 range_end) { struct iomap_folio_state *ifs = folio->private; if (*range_start >= range_end) return 0; if (ifs) return ifs_find_dirty_range(folio, ifs, range_start, range_end); return range_end - *range_start; } static void ifs_clear_range_dirty(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { struct inode *inode = folio->mapping->host; unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); unsigned int first_blk = (off >> inode->i_blkbits); unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; unsigned int nr_blks = last_blk - first_blk + 1; unsigned long flags; spin_lock_irqsave(&ifs->state_lock, flags); bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks); spin_unlock_irqrestore(&ifs->state_lock, flags); } static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len) { struct iomap_folio_state *ifs = folio->private; if (ifs) ifs_clear_range_dirty(folio, ifs, off, len); } static void ifs_set_range_dirty(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { struct inode *inode = folio->mapping->host; unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); unsigned int first_blk = (off >> inode->i_blkbits); unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; unsigned int nr_blks = last_blk - first_blk + 1; unsigned long flags; spin_lock_irqsave(&ifs->state_lock, flags); bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks); spin_unlock_irqrestore(&ifs->state_lock, flags); } static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len) { struct iomap_folio_state *ifs = folio->private; if (ifs) ifs_set_range_dirty(folio, ifs, off, len); } static struct iomap_folio_state *ifs_alloc(struct inode *inode, struct folio *folio, unsigned int flags) { struct iomap_folio_state *ifs = folio->private; unsigned int nr_blocks = i_blocks_per_folio(inode, folio); gfp_t gfp; if (ifs || nr_blocks <= 1) return ifs; if (flags & IOMAP_NOWAIT) gfp = GFP_NOWAIT; else gfp = GFP_NOFS | __GFP_NOFAIL; /* * ifs->state tracks two sets of state flags when the * filesystem block size is smaller than the folio size. * The first state tracks per-block uptodate and the * second tracks per-block dirty state. */ ifs = kzalloc(struct_size(ifs, state, BITS_TO_LONGS(2 * nr_blocks)), gfp); if (!ifs) return ifs; spin_lock_init(&ifs->state_lock); if (folio_test_uptodate(folio)) bitmap_set(ifs->state, 0, nr_blocks); if (folio_test_dirty(folio)) bitmap_set(ifs->state, nr_blocks, nr_blocks); folio_attach_private(folio, ifs); return ifs; } static void ifs_free(struct folio *folio) { struct iomap_folio_state *ifs = folio_detach_private(folio); if (!ifs) return; WARN_ON_ONCE(ifs->read_bytes_pending != 0); WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending)); WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) != folio_test_uptodate(folio)); kfree(ifs); } /* * Calculate how many bytes to truncate based off the number of blocks to * truncate and the end position to start truncating from. */ static size_t iomap_bytes_to_truncate(loff_t end_pos, unsigned block_bits, unsigned blocks_truncated) { unsigned block_size = 1 << block_bits; unsigned block_offset = end_pos & (block_size - 1); if (!block_offset) return blocks_truncated << block_bits; return ((blocks_truncated - 1) << block_bits) + block_offset; } /* * Calculate the range inside the folio that we actually need to read. */ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, loff_t *pos, loff_t length, size_t *offp, size_t *lenp) { struct iomap_folio_state *ifs = folio->private; loff_t orig_pos = *pos; loff_t isize = i_size_read(inode); unsigned block_bits = inode->i_blkbits; unsigned block_size = (1 << block_bits); size_t poff = offset_in_folio(folio, *pos); size_t plen = min_t(loff_t, folio_size(folio) - poff, length); size_t orig_plen = plen; unsigned first = poff >> block_bits; unsigned last = (poff + plen - 1) >> block_bits; /* * If the block size is smaller than the page size, we need to check the * per-block uptodate status and adjust the offset and length if needed * to avoid reading in already uptodate ranges. */ if (ifs) { unsigned int next, blocks_skipped; next = ifs_next_nonuptodate_block(folio, first, last); blocks_skipped = next - first; if (blocks_skipped) { unsigned long block_offset = *pos & (block_size - 1); unsigned bytes_skipped = (blocks_skipped << block_bits) - block_offset; *pos += bytes_skipped; poff += bytes_skipped; plen -= bytes_skipped; } first = next; /* truncate len if we find any trailing uptodate block(s) */ if (++next <= last) { next = ifs_next_uptodate_block(folio, next, last); if (next <= last) { plen -= iomap_bytes_to_truncate(*pos + plen, block_bits, last - next + 1); last = next - 1; } } } /* * If the extent spans the block that contains the i_size, we need to * handle both halves separately so that we properly zero data in the * page cache for blocks that are entirely outside of i_size. */ if (orig_pos <= isize && orig_pos + orig_plen > isize) { unsigned end = offset_in_folio(folio, isize - 1) >> block_bits; if (first <= end && last > end) plen -= iomap_bytes_to_truncate(*pos + plen, block_bits, last - end); } *offp = poff; *lenp = plen; } static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, loff_t pos) { const struct iomap *srcmap = iomap_iter_srcmap(iter); return srcmap->type != IOMAP_MAPPED || (srcmap->flags & IOMAP_F_NEW) || pos >= i_size_read(iter->inode); } /** * iomap_read_inline_data - copy inline data into the page cache * @iter: iteration structure * @folio: folio to copy to * * Copy the inline data in @iter into @folio and zero out the rest of the folio. * Only a single IOMAP_INLINE extent is allowed at the end of each file. * Returns zero for success to complete the read, or the usual negative errno. */ static int iomap_read_inline_data(const struct iomap_iter *iter, struct folio *folio) { const struct iomap *iomap = iomap_iter_srcmap(iter); size_t size = i_size_read(iter->inode) - iomap->offset; size_t offset = offset_in_folio(folio, iomap->offset); if (WARN_ON_ONCE(!iomap->inline_data)) return -EIO; if (folio_test_uptodate(folio)) return 0; if (WARN_ON_ONCE(size > iomap->length)) return -EIO; if (offset > 0) ifs_alloc(iter->inode, folio, iter->flags); folio_fill_tail(folio, offset, iomap->inline_data, size); iomap_set_range_uptodate(folio, offset, folio_size(folio) - offset); return 0; } void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, int error) { struct iomap_folio_state *ifs = folio->private; bool uptodate = !error; bool finished = true; if (ifs) { unsigned long flags; spin_lock_irqsave(&ifs->state_lock, flags); if (!error) uptodate = ifs_set_range_uptodate(folio, ifs, off, len); ifs->read_bytes_pending -= len; finished = !ifs->read_bytes_pending; spin_unlock_irqrestore(&ifs->state_lock, flags); } if (finished) folio_end_read(folio, uptodate); } EXPORT_SYMBOL_GPL(iomap_finish_folio_read); static void iomap_read_init(struct folio *folio) { struct iomap_folio_state *ifs = folio->private; if (ifs) { size_t len = folio_size(folio); /* * ifs->read_bytes_pending is used to track how many bytes are * read in asynchronously by the IO helper. We need to track * this so that we can know when the IO helper has finished * reading in all the necessary ranges of the folio and can end * the read. * * Increase ->read_bytes_pending by the folio size to start, and * add a +1 bias. We'll subtract the bias and any uptodate / * zeroed ranges that did not require IO in iomap_read_end() * after we're done processing the folio. * * We do this because otherwise, we would have to increment * ifs->read_bytes_pending every time a range in the folio needs * to be read in, which can get expensive since the spinlock * needs to be held whenever modifying ifs->read_bytes_pending. * * We add the bias to ensure the read has not been ended on the * folio when iomap_read_end() is called, even if the IO helper * has already finished reading in the entire folio. */ spin_lock_irq(&ifs->state_lock); WARN_ON_ONCE(ifs->read_bytes_pending != 0); ifs->read_bytes_pending = len + 1; spin_unlock_irq(&ifs->state_lock); } } /* * This ends IO if no bytes were submitted to an IO helper. * * Otherwise, this calibrates ifs->read_bytes_pending to represent only the * submitted bytes (see comment in iomap_read_init()). If all bytes submitted * have already been completed by the IO helper, then this will end the read. * Else the IO helper will end the read after all submitted ranges have been * read. */ static void iomap_read_end(struct folio *folio, size_t bytes_submitted) { struct iomap_folio_state *ifs = folio->private; if (ifs) { bool end_read, uptodate; spin_lock_irq(&ifs->state_lock); if (!ifs->read_bytes_pending) { WARN_ON_ONCE(bytes_submitted); spin_unlock_irq(&ifs->state_lock); folio_unlock(folio); return; } /* * Subtract any bytes that were initially accounted to * read_bytes_pending but skipped for IO. The +1 accounts for * the bias we added in iomap_read_init(). */ ifs->read_bytes_pending -= (folio_size(folio) + 1 - bytes_submitted); /* * If !ifs->read_bytes_pending, this means all pending reads by * the IO helper have already completed, which means we need to * end the folio read here. If ifs->read_bytes_pending != 0, * the IO helper will end the folio read. */ end_read = !ifs->read_bytes_pending; if (end_read) uptodate = ifs_is_fully_uptodate(folio, ifs); spin_unlock_irq(&ifs->state_lock); if (end_read) folio_end_read(folio, uptodate); } else if (!bytes_submitted) { /* * If there were no bytes submitted, this means we are * responsible for unlocking the folio here, since no IO helper * has taken ownership of it. If there were bytes submitted, * then the IO helper will end the read via * iomap_finish_folio_read(). */ folio_unlock(folio); } } static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, size_t *bytes_submitted) { const struct iomap *iomap = &iter->iomap; loff_t pos = iter->pos; loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; size_t poff, plen; loff_t pos_diff; int ret; if (iomap->type == IOMAP_INLINE) { ret = iomap_read_inline_data(iter, folio); if (ret) return ret; return iomap_iter_advance(iter, length); } ifs_alloc(iter->inode, folio, iter->flags); length = min_t(loff_t, length, folio_size(folio) - offset_in_folio(folio, pos)); while (length) { iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); pos_diff = pos - iter->pos; if (WARN_ON_ONCE(pos_diff + plen > length)) return -EIO; ret = iomap_iter_advance(iter, pos_diff); if (ret) return ret; if (plen == 0) return 0; /* zero post-eof blocks as the page may be mapped */ if (iomap_block_needs_zeroing(iter, pos)) { folio_zero_range(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen); } else { if (!*bytes_submitted) iomap_read_init(folio); ret = ctx->ops->read_folio_range(iter, ctx, plen); if (ret) return ret; *bytes_submitted += plen; } ret = iomap_iter_advance(iter, plen); if (ret) return ret; length -= pos_diff + plen; pos = iter->pos; } return 0; } void iomap_read_folio(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx) { struct folio *folio = ctx->cur_folio; struct iomap_iter iter = { .inode = folio->mapping->host, .pos = folio_pos(folio), .len = folio_size(folio), }; size_t bytes_submitted = 0; int ret; trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_submitted); if (ctx->ops->submit_read) ctx->ops->submit_read(ctx); iomap_read_end(folio, bytes_submitted); } EXPORT_SYMBOL_GPL(iomap_read_folio); static int iomap_readahead_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_submitted) { int ret; while (iomap_length(iter)) { if (ctx->cur_folio && offset_in_folio(ctx->cur_folio, iter->pos) == 0) { iomap_read_end(ctx->cur_folio, *cur_bytes_submitted); ctx->cur_folio = NULL; } if (!ctx->cur_folio) { ctx->cur_folio = readahead_folio(ctx->rac); if (WARN_ON_ONCE(!ctx->cur_folio)) return -EINVAL; *cur_bytes_submitted = 0; } ret = iomap_read_folio_iter(iter, ctx, cur_bytes_submitted); if (ret) return ret; } return 0; } /** * iomap_readahead - Attempt to read pages from a file. * @ops: The operations vector for the filesystem. * @ctx: The ctx used for issuing readahead. * * This function is for filesystems to call to implement their readahead * address_space operation. * * Context: The @ops callbacks may submit I/O (eg to read the addresses of * blocks from disc), and may wait for it. The caller may be trying to * access a different page, and so sleeping excessively should be avoided. * It may allocate memory, but should avoid costly allocations. This * function is called with memalloc_nofs set, so allocations will not cause * the filesystem to be reentered. */ void iomap_readahead(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx) { struct readahead_control *rac = ctx->rac; struct iomap_iter iter = { .inode = rac->mapping->host, .pos = readahead_pos(rac), .len = readahead_length(rac), }; size_t cur_bytes_submitted; trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); while (iomap_iter(&iter, ops) > 0) iter.status = iomap_readahead_iter(&iter, ctx, &cur_bytes_submitted); if (ctx->ops->submit_read) ctx->ops->submit_read(ctx); if (ctx->cur_folio) iomap_read_end(ctx->cur_folio, cur_bytes_submitted); } EXPORT_SYMBOL_GPL(iomap_readahead); /* * iomap_is_partially_uptodate checks whether blocks within a folio are * uptodate or not. * * Returns true if all blocks which correspond to the specified part * of the folio are uptodate. */ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = folio->mapping->host; unsigned first, last; if (!ifs) return false; /* Caller's range may extend past the end of this folio */ count = min(folio_size(folio) - from, count); /* First and last blocks in range within folio */ first = from >> inode->i_blkbits; last = (from + count - 1) >> inode->i_blkbits; return ifs_next_nonuptodate_block(folio, first, last) > last; } EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); /** * iomap_get_folio - get a folio reference for writing * @iter: iteration structure * @pos: start offset of write * @len: Suggested size of folio to create. * * Returns a locked reference to the folio at @pos, or an error pointer if the * folio could not be obtained. */ struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len) { fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; if (iter->flags & IOMAP_NOWAIT) fgp |= FGP_NOWAIT; if (iter->flags & IOMAP_DONTCACHE) fgp |= FGP_DONTCACHE; fgp |= fgf_set_order(len); return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, fgp, mapping_gfp_mask(iter->inode->i_mapping)); } EXPORT_SYMBOL_GPL(iomap_get_folio); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags) { trace_iomap_release_folio(folio->mapping->host, folio_pos(folio), folio_size(folio)); /* * If the folio is dirty, we refuse to release our metadata because * it may be partially dirty. Once we track per-block dirty state, * we can release the metadata if every block is dirty. */ if (folio_test_dirty(folio)) return false; ifs_free(folio); return true; } EXPORT_SYMBOL_GPL(iomap_release_folio); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) { trace_iomap_invalidate_folio(folio->mapping->host, folio_pos(folio) + offset, len); /* * If we're invalidating the entire folio, clear the dirty state * from it and release it to avoid unnecessary buildup of the LRU. */ if (offset == 0 && len == folio_size(folio)) { WARN_ON_ONCE(folio_test_writeback(folio)); folio_cancel_dirty(folio); ifs_free(folio); } } EXPORT_SYMBOL_GPL(iomap_invalidate_folio); bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio) { struct inode *inode = mapping->host; size_t len = folio_size(folio); ifs_alloc(inode, folio, 0); iomap_set_range_dirty(folio, 0, len); return filemap_dirty_folio(mapping, folio); } EXPORT_SYMBOL_GPL(iomap_dirty_folio); static void iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) { loff_t i_size = i_size_read(inode); /* * Only truncate newly allocated pages beyoned EOF, even if the * write started inside the existing inode size. */ if (pos + len > i_size) truncate_pagecache_range(inode, max(pos, i_size), pos + len - 1); } static int __iomap_write_begin(const struct iomap_iter *iter, const struct iomap_write_ops *write_ops, size_t len, struct folio *folio) { struct iomap_folio_state *ifs; loff_t pos = iter->pos; loff_t block_size = i_blocksize(iter->inode); loff_t block_start = round_down(pos, block_size); loff_t block_end = round_up(pos + len, block_size); unsigned int nr_blocks = i_blocks_per_folio(iter->inode, folio); size_t from = offset_in_folio(folio, pos), to = from + len; size_t poff, plen; /* * If the write or zeroing completely overlaps the current folio, then * entire folio will be dirtied so there is no need for * per-block state tracking structures to be attached to this folio. * For the unshare case, we must read in the ondisk contents because we * are not changing pagecache contents. */ if (!(iter->flags & IOMAP_UNSHARE) && pos <= folio_pos(folio) && pos + len >= folio_next_pos(folio)) return 0; ifs = ifs_alloc(iter->inode, folio, iter->flags); if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1) return -EAGAIN; if (folio_test_uptodate(folio)) return 0; do { iomap_adjust_read_range(iter->inode, folio, &block_start, block_end - block_start, &poff, &plen); if (plen == 0) break; /* * If the read range will be entirely overwritten by the write, * we can skip having to zero/read it in. */ if (!(iter->flags & IOMAP_UNSHARE) && from <= poff && to >= poff + plen) continue; if (iomap_block_needs_zeroing(iter, block_start)) { if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE)) return -EIO; folio_zero_segments(folio, poff, from, to, poff + plen); } else { int status; if (iter->flags & IOMAP_NOWAIT) return -EAGAIN; if (write_ops && write_ops->read_folio_range) status = write_ops->read_folio_range(iter, folio, block_start, plen); else status = iomap_bio_read_folio_range_sync(iter, folio, block_start, plen); if (status) return status; } iomap_set_range_uptodate(folio, poff, plen); } while ((block_start += plen) < block_end); return 0; } static struct folio *__iomap_get_folio(struct iomap_iter *iter, const struct iomap_write_ops *write_ops, size_t len) { loff_t pos = iter->pos; if (!mapping_large_folio_support(iter->inode->i_mapping)) len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) { struct folio *folio = folio_batch_next(iter->fbatch); if (!folio) return NULL; /* * The folio mapping generally shouldn't have changed based on * fs locks, but be consistent with filemap lookup and retry * the iter if it does. */ folio_lock(folio); if (unlikely(folio->mapping != iter->inode->i_mapping)) { iter->iomap.flags |= IOMAP_F_STALE; folio_unlock(folio); return NULL; } folio_get(folio); folio_wait_stable(folio); return folio; } if (write_ops && write_ops->get_folio) return write_ops->get_folio(iter, pos, len); return iomap_get_folio(iter, pos, len); } static void __iomap_put_folio(struct iomap_iter *iter, const struct iomap_write_ops *write_ops, size_t ret, struct folio *folio) { loff_t pos = iter->pos; if (write_ops && write_ops->put_folio) { write_ops->put_folio(iter->inode, pos, ret, folio); } else { folio_unlock(folio); folio_put(folio); } } /* trim pos and bytes to within a given folio */ static loff_t iomap_trim_folio_range(struct iomap_iter *iter, struct folio *folio, size_t *offset, u64 *bytes) { loff_t pos = iter->pos; size_t fsize = folio_size(folio); WARN_ON_ONCE(pos < folio_pos(folio)); WARN_ON_ONCE(pos >= folio_pos(folio) + fsize); *offset = offset_in_folio(folio, pos); *bytes = min(*bytes, fsize - *offset); return pos; } static int iomap_write_begin_inline(const struct iomap_iter *iter, struct folio *folio) { /* needs more work for the tailpacking case; disable for now */ if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0)) return -EIO; return iomap_read_inline_data(iter, folio); } /* * Grab and prepare a folio for write based on iter state. Returns the folio, * offset, and length. Callers can optionally pass a max length *plen, * otherwise init to zero. */ static int iomap_write_begin(struct iomap_iter *iter, const struct iomap_write_ops *write_ops, struct folio **foliop, size_t *poffset, u64 *plen) { const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos; u64 len = min_t(u64, SIZE_MAX, iomap_length(iter)); struct folio *folio; int status = 0; len = min_not_zero(len, *plen); *foliop = NULL; *plen = 0; if (fatal_signal_pending(current)) return -EINTR; folio = __iomap_get_folio(iter, write_ops, len); if (IS_ERR(folio)) return PTR_ERR(folio); /* * No folio means we're done with a batch. We still have range to * process so return and let the caller iterate and refill the batch. */ if (!folio) { WARN_ON_ONCE(!(iter->iomap.flags & IOMAP_F_FOLIO_BATCH)); return 0; } /* * Now we have a locked folio, before we do anything with it we need to * check that the iomap we have cached is not stale. The inode extent * mapping can change due to concurrent IO in flight (e.g. * IOMAP_UNWRITTEN state can change and memory reclaim could have * reclaimed a previously partially written page at this index after IO * completion before this write reaches this file offset) and hence we * could do the wrong thing here (zero a page range incorrectly or fail * to zero) and corrupt data. */ if (write_ops && write_ops->iomap_valid) { bool iomap_valid = write_ops->iomap_valid(iter->inode, &iter->iomap); if (!iomap_valid) { iter->iomap.flags |= IOMAP_F_STALE; status = 0; goto out_unlock; } } /* * The folios in a batch may not be contiguous. If we've skipped * forward, advance the iter to the pos of the current folio. If the * folio starts beyond the end of the mapping, it may have been trimmed * since the lookup for whatever reason. Return a NULL folio to * terminate the op. */ if (folio_pos(folio) > iter->pos) { len = min_t(u64, folio_pos(folio) - iter->pos, iomap_length(iter)); status = iomap_iter_advance(iter, len); len = iomap_length(iter); if (status || !len) goto out_unlock; } pos = iomap_trim_folio_range(iter, folio, poffset, &len); if (srcmap->type == IOMAP_INLINE) status = iomap_write_begin_inline(iter, folio); else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(folio, pos, len, NULL, srcmap); else status = __iomap_write_begin(iter, write_ops, len, folio); if (unlikely(status)) goto out_unlock; *foliop = folio; *plen = len; return 0; out_unlock: __iomap_put_folio(iter, write_ops, 0, folio); return status; } static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct folio *folio) { flush_dcache_folio(folio); /* * The blocks that were entirely written will now be uptodate, so we * don't have to worry about a read_folio reading them and overwriting a * partial write. However, if we've encountered a short write and only * partially written into a block, it will not be marked uptodate, so a * read_folio might come in and destroy our partial write. * * Do the simplest thing and just treat any short write to a * non-uptodate page as a zero-length write, and force the caller to * redo the whole thing. */ if (unlikely(copied < len && !folio_test_uptodate(folio))) return false; iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); filemap_dirty_folio(inode->i_mapping, folio); return true; } static bool iomap_write_end_inline(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t copied) { const struct iomap *iomap = &iter->iomap; void *addr; WARN_ON_ONCE(!folio_test_uptodate(folio)); BUG_ON(!iomap_inline_data_valid(iomap)); if (WARN_ON_ONCE(!iomap->inline_data)) return false; flush_dcache_folio(folio); addr = kmap_local_folio(folio, pos); memcpy(iomap_inline_data(iomap, pos), addr, copied); kunmap_local(addr); mark_inode_dirty(iter->inode); return true; } /* * Returns true if all copied bytes have been written to the pagecache, * otherwise return false. */ static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; if (srcmap->type == IOMAP_INLINE) return iomap_write_end_inline(iter, folio, pos, copied); if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { size_t bh_written; bh_written = block_write_end(pos, len, copied, folio); WARN_ON_ONCE(bh_written != copied && bh_written != 0); return bh_written == copied; } return __iomap_write_end(iter->inode, pos, len, copied, folio); } static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i, const struct iomap_write_ops *write_ops) { ssize_t total_written = 0; int status = 0; struct address_space *mapping = iter->inode->i_mapping; size_t chunk = mapping_max_folio_size(mapping); unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; do { struct folio *folio; loff_t old_size; size_t offset; /* Offset into folio */ u64 bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ u64 written; /* Bytes have been written */ loff_t pos; bytes = iov_iter_count(i); retry: offset = iter->pos & (chunk - 1); bytes = min(chunk - offset, bytes); status = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); if (unlikely(status)) break; if (bytes > iomap_length(iter)) bytes = iomap_length(iter); /* * Bring in the user page that we'll copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * For async buffered writes the assumption is that the user * page has already been faulted in. This can be optimized by * faulting the user page. */ if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) { status = -EFAULT; break; } status = iomap_write_begin(iter, write_ops, &folio, &offset, &bytes); if (unlikely(status)) { iomap_write_failed(iter->inode, iter->pos, bytes); break; } if (iter->iomap.flags & IOMAP_F_STALE) break; pos = iter->pos; if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); written = iomap_write_end(iter, bytes, copied, folio) ? copied : 0; /* * Update the in-memory inode size after copying the data into * the page cache. It's up to the file system to write the * updated size to disk, preferably after I/O completion so that * no stale data is exposed. Only once that's done can we * unlock and release the folio. */ old_size = iter->inode->i_size; if (pos + written > old_size) { i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } __iomap_put_folio(iter, write_ops, written, folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); cond_resched(); if (unlikely(written == 0)) { /* * A short copy made iomap_write_end() reject the * thing entirely. Might be memory poisoning * halfway through, might be a race with munmap, * might be severe memory pressure. */ iomap_write_failed(iter->inode, pos, bytes); iov_iter_revert(i, copied); if (chunk > PAGE_SIZE) chunk /= 2; if (copied) { bytes = copied; goto retry; } } else { total_written += written; iomap_iter_advance(iter, written); } } while (iov_iter_count(i) && iomap_length(iter)); return total_written ? 0 : status; } ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private) { struct iomap_iter iter = { .inode = iocb->ki_filp->f_mapping->host, .pos = iocb->ki_pos, .len = iov_iter_count(i), .flags = IOMAP_WRITE, .private = private, }; ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) iter.flags |= IOMAP_NOWAIT; if (iocb->ki_flags & IOCB_DONTCACHE) iter.flags |= IOMAP_DONTCACHE; while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_write_iter(&iter, i, write_ops); if (unlikely(iter.pos == iocb->ki_pos)) return ret; ret = iter.pos - iocb->ki_pos; iocb->ki_pos = iter.pos; return ret; } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); static void iomap_write_delalloc_ifs_punch(struct inode *inode, struct folio *folio, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) { unsigned int first_blk, last_blk; loff_t last_byte; u8 blkbits = inode->i_blkbits; struct iomap_folio_state *ifs; /* * When we have per-block dirty tracking, there can be * blocks within a folio which are marked uptodate * but not dirty. In that case it is necessary to punch * out such blocks to avoid leaking any delalloc blocks. */ ifs = folio->private; if (!ifs) return; last_byte = min_t(loff_t, end_byte - 1, folio_next_pos(folio) - 1); first_blk = offset_in_folio(folio, start_byte) >> blkbits; last_blk = offset_in_folio(folio, last_byte) >> blkbits; while ((first_blk = ifs_next_clean_block(folio, first_blk, last_blk)) <= last_blk) { punch(inode, folio_pos(folio) + (first_blk << blkbits), 1 << blkbits, iomap); first_blk++; } } static void iomap_write_delalloc_punch(struct inode *inode, struct folio *folio, loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) { if (!folio_test_dirty(folio)) return; /* if dirty, punch up to offset */ if (start_byte > *punch_start_byte) { punch(inode, *punch_start_byte, start_byte - *punch_start_byte, iomap); } /* Punch non-dirty blocks within folio */ iomap_write_delalloc_ifs_punch(inode, folio, start_byte, end_byte, iomap, punch); /* * Make sure the next punch start is correctly bound to * the end of this data range, not the end of the folio. */ *punch_start_byte = min_t(loff_t, end_byte, folio_next_pos(folio)); } /* * Scan the data range passed to us for dirty page cache folios. If we find a * dirty folio, punch out the preceding range and update the offset from which * the next punch will start from. * * We can punch out storage reservations under clean pages because they either * contain data that has been written back - in which case the delalloc punch * over that range is a no-op - or they have been read faults in which case they * contain zeroes and we can remove the delalloc backing range and any new * writes to those pages will do the normal hole filling operation... * * This makes the logic simple: we only need to keep the delalloc extents only * over the dirty ranges of the page cache. * * This function uses [start_byte, end_byte) intervals (i.e. open ended) to * simplify range iterations. */ static void iomap_write_delalloc_scan(struct inode *inode, loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) { while (start_byte < end_byte) { struct folio *folio; /* grab locked page */ folio = filemap_lock_folio(inode->i_mapping, start_byte >> PAGE_SHIFT); if (IS_ERR(folio)) { start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + PAGE_SIZE; continue; } iomap_write_delalloc_punch(inode, folio, punch_start_byte, start_byte, end_byte, iomap, punch); /* move offset to start of next folio in range */ start_byte = folio_next_pos(folio); folio_unlock(folio); folio_put(folio); } } /* * When a short write occurs, the filesystem might need to use ->iomap_end * to remove space reservations created in ->iomap_begin. * * For filesystems that use delayed allocation, there can be dirty pages over * the delalloc extent outside the range of a short write but still within the * delalloc extent allocated for this iomap if the write raced with page * faults. * * Punch out all the delalloc blocks in the range given except for those that * have dirty data still pending in the page cache - those are going to be * written and so must still retain the delalloc backing for writeback. * * The punch() callback *must* only punch delalloc extents in the range passed * to it. It must skip over all other types of extents in the range and leave * them completely unchanged. It must do this punch atomically with respect to * other extent modifications. * * The punch() callback may be called with a folio locked to prevent writeback * extent allocation racing at the edge of the range we are currently punching. * The locked folio may or may not cover the range being punched, so it is not * safe for the punch() callback to lock folios itself. * * Lock order is: * * inode->i_rwsem (shared or exclusive) * inode->i_mapping->invalidate_lock (exclusive) * folio_lock() * ->punch * internal filesystem allocation lock * * As we are scanning the page cache for data, we don't need to reimplement the * wheel - mapping_seek_hole_data() does exactly what we need to identify the * start and end of data ranges correctly even for sub-folio block sizes. This * byte range based iteration is especially convenient because it means we * don't have to care about variable size folios, nor where the start or end of * the data range lies within a folio, if they lie within the same folio or even * if there are multiple discontiguous data ranges within the folio. * * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so * can return data ranges that exist in the cache beyond EOF. e.g. a page fault * spanning EOF will initialise the post-EOF data to zeroes and mark it up to * date. A write page fault can then mark it dirty. If we then fail a write() * beyond EOF into that up to date cached range, we allocate a delalloc block * beyond EOF and then have to punch it out. Because the range is up to date, * mapping_seek_hole_data() will return it, and we will skip the punch because * the folio is dirty. THis is incorrect - we always need to punch out delalloc * beyond EOF in this case as writeback will never write back and covert that * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, * resulting in always punching out the range from the EOF to the end of the * range the iomap spans. * * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) * returns the end of the data range (data_end). Using closed intervals would * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose * the code to subtle off-by-one bugs.... */ void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, loff_t end_byte, unsigned flags, struct iomap *iomap, iomap_punch_t punch) { loff_t punch_start_byte = start_byte; loff_t scan_end_byte = min(i_size_read(inode), end_byte); /* * The caller must hold invalidate_lock to avoid races with page faults * re-instantiating folios and dirtying them via ->page_mkwrite whilst * we walk the cache and perform delalloc extent removal. Failing to do * this can leave dirty pages with no space reservation in the cache. */ lockdep_assert_held_write(&inode->i_mapping->invalidate_lock); while (start_byte < scan_end_byte) { loff_t data_end; start_byte = mapping_seek_hole_data(inode->i_mapping, start_byte, scan_end_byte, SEEK_DATA); /* * If there is no more data to scan, all that is left is to * punch out the remaining range. * * Note that mapping_seek_hole_data is only supposed to return * either an offset or -ENXIO, so WARN on any other error as * that would be an API change without updating the callers. */ if (start_byte == -ENXIO || start_byte == scan_end_byte) break; if (WARN_ON_ONCE(start_byte < 0)) return; WARN_ON_ONCE(start_byte < punch_start_byte); WARN_ON_ONCE(start_byte > scan_end_byte); /* * We find the end of this contiguous cached data range by * seeking from start_byte to the beginning of the next hole. */ data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, scan_end_byte, SEEK_HOLE); if (WARN_ON_ONCE(data_end < 0)) return; /* * If we race with post-direct I/O invalidation of the page cache, * there might be no data left at start_byte. */ if (data_end == start_byte) continue; WARN_ON_ONCE(data_end < start_byte); WARN_ON_ONCE(data_end > scan_end_byte); iomap_write_delalloc_scan(inode, &punch_start_byte, start_byte, data_end, iomap, punch); /* The next data search starts at the end of this one. */ start_byte = data_end; } if (punch_start_byte < end_byte) punch(inode, punch_start_byte, end_byte - punch_start_byte, iomap); } EXPORT_SYMBOL_GPL(iomap_write_delalloc_release); static int iomap_unshare_iter(struct iomap_iter *iter, const struct iomap_write_ops *write_ops) { struct iomap *iomap = &iter->iomap; u64 bytes = iomap_length(iter); int status; if (!iomap_want_unshare_iter(iter)) return iomap_iter_advance(iter, bytes); do { struct folio *folio; size_t offset; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); status = iomap_write_begin(iter, write_ops, &folio, &offset, &bytes); if (unlikely(status)) return status; if (iomap->flags & IOMAP_F_STALE) break; ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, write_ops, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; cond_resched(); balance_dirty_pages_ratelimited(iter->inode->i_mapping); status = iomap_iter_advance(iter, bytes); if (status) break; } while ((bytes = iomap_length(iter)) > 0); return status; } int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops) { struct iomap_iter iter = { .inode = inode, .pos = pos, .flags = IOMAP_WRITE | IOMAP_UNSHARE, }; loff_t size = i_size_read(inode); int ret; if (pos < 0 || pos >= size) return 0; iter.len = min(len, size - pos); while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_unshare_iter(&iter, write_ops); return ret; } EXPORT_SYMBOL_GPL(iomap_file_unshare); /* * Flush the remaining range of the iter and mark the current mapping stale. * This is used when zero range sees an unwritten mapping that may have had * dirty pagecache over it. */ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) { struct address_space *mapping = i->inode->i_mapping; loff_t end = i->pos + i->len - 1; i->iomap.flags |= IOMAP_F_STALE; return filemap_write_and_wait_range(mapping, i->pos, end); } static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, const struct iomap_write_ops *write_ops) { u64 bytes = iomap_length(iter); int status; do { struct folio *folio; size_t offset; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); status = iomap_write_begin(iter, write_ops, &folio, &offset, &bytes); if (status) return status; if (iter->iomap.flags & IOMAP_F_STALE) break; /* a NULL folio means we're done with a folio batch */ if (!folio) { status = iomap_iter_advance_full(iter); break; } /* warn about zeroing folios beyond eof that won't write back */ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); trace_iomap_zero_iter(iter->inode, folio_pos(folio) + offset, bytes); folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, write_ops, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; status = iomap_iter_advance(iter, bytes); if (status) break; } while ((bytes = iomap_length(iter)) > 0); if (did_zero) *did_zero = true; return status; } /** * iomap_fill_dirty_folios - fill a folio batch with dirty folios * @iter: Iteration structure * @start: Start offset of range. Updated based on lookup progress. * @end: End offset of range * @iomap_flags: Flags to set on the associated iomap to track the batch. * * Returns the folio count directly. Also returns the associated control flag if * the the batch lookup is performed and the expected offset of a subsequent * lookup via out params. The caller is responsible to set the flag on the * associated iomap. */ unsigned int iomap_fill_dirty_folios( struct iomap_iter *iter, loff_t *start, loff_t end, unsigned int *iomap_flags) { struct address_space *mapping = iter->inode->i_mapping; pgoff_t pstart = *start >> PAGE_SHIFT; pgoff_t pend = (end - 1) >> PAGE_SHIFT; unsigned int count; if (!iter->fbatch) { *start = end; return 0; } count = filemap_get_folios_dirty(mapping, &pstart, pend, iter->fbatch); *start = (pstart << PAGE_SHIFT); *iomap_flags |= IOMAP_F_FOLIO_BATCH; return count; } EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private) { struct folio_batch fbatch; struct iomap_iter iter = { .inode = inode, .pos = pos, .len = len, .flags = IOMAP_ZERO, .private = private, .fbatch = &fbatch, }; struct address_space *mapping = inode->i_mapping; int ret; bool range_dirty; folio_batch_init(&fbatch); /* * To avoid an unconditional flush, check pagecache state and only flush * if dirty and the fs returns a mapping that might convert on * writeback. */ range_dirty = filemap_range_needs_writeback(mapping, iter.pos, iter.pos + iter.len - 1); while ((ret = iomap_iter(&iter, ops)) > 0) { const struct iomap *srcmap = iomap_iter_srcmap(&iter); if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && srcmap->type != IOMAP_UNWRITTEN)) return -EIO; if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)) { s64 status; if (range_dirty) { range_dirty = false; status = iomap_zero_iter_flush_and_stale(&iter); } else { status = iomap_iter_advance_full(&iter); } iter.status = status; continue; } iter.status = iomap_zero_iter(&iter, did_zero, write_ops); } return ret; } EXPORT_SYMBOL_GPL(iomap_zero_range); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private) { unsigned int blocksize = i_blocksize(inode); unsigned int off = pos & (blocksize - 1); /* Block boundary? Nothing to do */ if (!off) return 0; return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops, write_ops, private); } EXPORT_SYMBOL_GPL(iomap_truncate_page); static int iomap_folio_mkwrite_iter(struct iomap_iter *iter, struct folio *folio) { loff_t length = iomap_length(iter); int ret; if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) { ret = __block_write_begin_int(folio, iter->pos, length, NULL, &iter->iomap); if (ret) return ret; block_commit_write(folio, 0, length); } else { WARN_ON_ONCE(!folio_test_uptodate(folio)); folio_mark_dirty(folio); } return iomap_iter_advance(iter, length); } vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, void *private) { struct iomap_iter iter = { .inode = file_inode(vmf->vma->vm_file), .flags = IOMAP_WRITE | IOMAP_FAULT, .private = private, }; struct folio *folio = page_folio(vmf->page); ssize_t ret; folio_lock(folio); ret = folio_mkwrite_check_truncate(folio, iter.inode); if (ret < 0) goto out_unlock; iter.pos = folio_pos(folio); iter.len = ret; while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_folio_mkwrite_iter(&iter, folio); if (ret < 0) goto out_unlock; folio_wait_stable(folio); return VM_FAULT_LOCKED; out_unlock: folio_unlock(folio); return vmf_fs_error(ret); } EXPORT_SYMBOL_GPL(iomap_page_mkwrite); static void iomap_writeback_init(struct inode *inode, struct folio *folio) { struct iomap_folio_state *ifs = folio->private; WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); if (ifs) { WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0); /* * Set this to the folio size. After processing the folio for * writeback in iomap_writeback_folio(), we'll subtract any * ranges not written back. * * We do this because otherwise, we would have to atomically * increment ifs->write_bytes_pending every time a range in the * folio needs to be written back. */ atomic_set(&ifs->write_bytes_pending, folio_size(folio)); } } void iomap_finish_folio_write(struct inode *inode, struct folio *folio, size_t len) { struct iomap_folio_state *ifs = folio->private; WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0); if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending)) folio_end_writeback(folio); } EXPORT_SYMBOL_GPL(iomap_finish_folio_write); static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, struct folio *folio, u64 pos, u32 rlen, u64 end_pos, size_t *bytes_submitted) { do { ssize_t ret; ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos); if (WARN_ON_ONCE(ret == 0 || ret > rlen)) return -EIO; if (ret < 0) return ret; rlen -= ret; pos += ret; /* * Holes are not written back by ->writeback_range, so track * if we did handle anything that is not a hole here. */ if (wpc->iomap.type != IOMAP_HOLE) *bytes_submitted += ret; } while (rlen); return 0; } /* * Check interaction of the folio with the file end. * * If the folio is entirely beyond i_size, return false. If it straddles * i_size, adjust end_pos and zero all data beyond i_size. */ static bool iomap_writeback_handle_eof(struct folio *folio, struct inode *inode, u64 *end_pos) { u64 isize = i_size_read(inode); if (*end_pos > isize) { size_t poff = offset_in_folio(folio, isize); pgoff_t end_index = isize >> PAGE_SHIFT; /* * If the folio is entirely ouside of i_size, skip it. * * This can happen due to a truncate operation that is in * progress and in that case truncate will finish it off once * we've dropped the folio lock. * * Note that the pgoff_t used for end_index is an unsigned long. * If the given offset is greater than 16TB on a 32-bit system, * then if we checked if the folio is fully outside i_size with * "if (folio->index >= end_index + 1)", "end_index + 1" would * overflow and evaluate to 0. Hence this folio would be * redirtied and written out repeatedly, which would result in * an infinite loop; the user program performing this operation * would hang. Instead, we can detect this situation by * checking if the folio is totally beyond i_size or if its * offset is just equal to the EOF. */ if (folio->index > end_index || (folio->index == end_index && poff == 0)) return false; /* * The folio straddles i_size. * * It must be zeroed out on each and every writepage invocation * because it may be mmapped: * * A file is mapped in multiples of the page size. For a * file that is not a multiple of the page size, the * remaining memory is zeroed when mapped, and writes to that * region are not written out to the file. * * Also adjust the end_pos to the end of file and skip writeback * for all blocks entirely beyond i_size. */ folio_zero_segment(folio, poff, folio_size(folio)); *end_pos = isize; } return true; } int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = wpc->inode; u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); u64 end_aligned = 0; size_t bytes_submitted = 0; int error = 0; u32 rlen; WARN_ON_ONCE(!folio_test_locked(folio)); WARN_ON_ONCE(folio_test_dirty(folio)); WARN_ON_ONCE(folio_test_writeback(folio)); trace_iomap_writeback_folio(inode, pos, folio_size(folio)); if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) return 0; WARN_ON_ONCE(end_pos <= pos); if (i_blocks_per_folio(inode, folio) > 1) { if (!ifs) { ifs = ifs_alloc(inode, folio, 0); iomap_set_range_dirty(folio, 0, end_pos - pos); } iomap_writeback_init(inode, folio); } /* * Set the writeback bit ASAP, as the I/O completion for the single * block per folio case happen hit as soon as we're submitting the bio. */ folio_start_writeback(folio); /* * Walk through the folio to find dirty areas to write back. */ end_aligned = round_up(end_pos, i_blocksize(inode)); while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos, &bytes_submitted); if (error) break; pos += rlen; } if (bytes_submitted) wpc->nr_folios++; /* * We can have dirty bits set past end of file in page_mkwrite path * while mapping the last partial folio. Hence it's better to clear * all the dirty bits in the folio here. */ iomap_clear_range_dirty(folio, 0, folio_size(folio)); /* * Usually the writeback bit is cleared by the I/O completion handler. * But we may end up either not actually writing any blocks, or (when * there are multiple blocks in a folio) all I/O might have finished * already at this point. In that case we need to clear the writeback * bit ourselves right after unlocking the page. */ if (ifs) { /* * Subtract any bytes that were initially accounted to * write_bytes_pending but skipped for writeback. */ size_t bytes_not_submitted = folio_size(folio) - bytes_submitted; if (bytes_not_submitted) iomap_finish_folio_write(inode, folio, bytes_not_submitted); } else if (!bytes_submitted) { folio_end_writeback(folio); } mapping_set_error(inode->i_mapping, error); return error; } EXPORT_SYMBOL_GPL(iomap_writeback_folio); int iomap_writepages(struct iomap_writepage_ctx *wpc) { struct address_space *mapping = wpc->inode->i_mapping; struct folio *folio = NULL; int error; /* * Writeback from reclaim context should never happen except in the case * of a VM regression so warn about it and refuse to write the data. */ if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC | PF_KSWAPD)) == PF_MEMALLOC)) return -EIO; while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) { error = iomap_writeback_folio(wpc, folio); folio_unlock(folio); } /* * If @error is non-zero, it means that we have a situation where some * part of the submission process has failed after we've marked pages * for writeback. * * We cannot cancel the writeback directly in that case, so always call * ->writeback_submit to run the I/O completion handler to clear the * writeback bit and let the file system proess the errors. */ if (wpc->wb_ctx) return wpc->ops->writeback_submit(wpc, error); return error; } EXPORT_SYMBOL_GPL(iomap_writepages); |
| 32 32 32 32 96 96 14 14 14 14 14 14 14 14 14 4 4 4 4 4 4 1 9 9 3 3 3 3 3 3 3 3 3 90 84 94 81 76 20 14 6 6 1 5 5 79 113 96 113 112 113 113 82 113 1 1 1 3 3 3 3 3 3 3 3 3 3 3 3 13 13 13 13 38 38 38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 | /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/errno.h> #include <linux/err.h> #include <linux/export.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/in6.h> #include <net/addrconf.h> #include <linux/security.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> #include <rdma/ib_addr.h> #include <rdma/rw.h> #include <rdma/lag.h> #include "core_priv.h" #include <trace/events/rdma_core.h> static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); static const char * const ib_events[] = { [IB_EVENT_CQ_ERR] = "CQ error", [IB_EVENT_QP_FATAL] = "QP fatal error", [IB_EVENT_QP_REQ_ERR] = "QP request error", [IB_EVENT_QP_ACCESS_ERR] = "QP access error", [IB_EVENT_COMM_EST] = "communication established", [IB_EVENT_SQ_DRAINED] = "send queue drained", [IB_EVENT_PATH_MIG] = "path migration successful", [IB_EVENT_PATH_MIG_ERR] = "path migration error", [IB_EVENT_DEVICE_FATAL] = "device fatal error", [IB_EVENT_PORT_ACTIVE] = "port active", [IB_EVENT_PORT_ERR] = "port error", [IB_EVENT_LID_CHANGE] = "LID change", [IB_EVENT_PKEY_CHANGE] = "P_key change", [IB_EVENT_SM_CHANGE] = "SM change", [IB_EVENT_SRQ_ERR] = "SRQ error", [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", [IB_EVENT_CLIENT_REREGISTER] = "client reregister", [IB_EVENT_GID_CHANGE] = "GID changed", }; const char *__attribute_const__ ib_event_msg(enum ib_event_type event) { size_t index = event; return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? ib_events[index] : "unrecognized event"; } EXPORT_SYMBOL(ib_event_msg); static const char * const wc_statuses[] = { [IB_WC_SUCCESS] = "success", [IB_WC_LOC_LEN_ERR] = "local length error", [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", [IB_WC_LOC_PROT_ERR] = "local protection error", [IB_WC_WR_FLUSH_ERR] = "WR flushed", [IB_WC_MW_BIND_ERR] = "memory bind operation error", [IB_WC_BAD_RESP_ERR] = "bad response error", [IB_WC_LOC_ACCESS_ERR] = "local access error", [IB_WC_REM_INV_REQ_ERR] = "remote invalid request error", [IB_WC_REM_ACCESS_ERR] = "remote access error", [IB_WC_REM_OP_ERR] = "remote operation error", [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", [IB_WC_REM_ABORT_ERR] = "operation aborted", [IB_WC_INV_EECN_ERR] = "invalid EE context number", [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", [IB_WC_FATAL_ERR] = "fatal error", [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", [IB_WC_GENERAL_ERR] = "general error", }; const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) { size_t index = status; return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? wc_statuses[index] : "unrecognized status"; } EXPORT_SYMBOL(ib_wc_status_msg); __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 1; case IB_RATE_5_GBPS: return 2; case IB_RATE_10_GBPS: return 4; case IB_RATE_20_GBPS: return 8; case IB_RATE_30_GBPS: return 12; case IB_RATE_40_GBPS: return 16; case IB_RATE_60_GBPS: return 24; case IB_RATE_80_GBPS: return 32; case IB_RATE_120_GBPS: return 48; case IB_RATE_14_GBPS: return 6; case IB_RATE_56_GBPS: return 22; case IB_RATE_112_GBPS: return 45; case IB_RATE_168_GBPS: return 67; case IB_RATE_25_GBPS: return 10; case IB_RATE_100_GBPS: return 40; case IB_RATE_200_GBPS: return 80; case IB_RATE_300_GBPS: return 120; case IB_RATE_28_GBPS: return 11; case IB_RATE_50_GBPS: return 20; case IB_RATE_400_GBPS: return 160; case IB_RATE_600_GBPS: return 240; case IB_RATE_800_GBPS: return 320; case IB_RATE_1600_GBPS: return 640; default: return -1; } } EXPORT_SYMBOL(ib_rate_to_mult); __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) { switch (mult) { case 1: return IB_RATE_2_5_GBPS; case 2: return IB_RATE_5_GBPS; case 4: return IB_RATE_10_GBPS; case 8: return IB_RATE_20_GBPS; case 12: return IB_RATE_30_GBPS; case 16: return IB_RATE_40_GBPS; case 24: return IB_RATE_60_GBPS; case 32: return IB_RATE_80_GBPS; case 48: return IB_RATE_120_GBPS; case 6: return IB_RATE_14_GBPS; case 22: return IB_RATE_56_GBPS; case 45: return IB_RATE_112_GBPS; case 67: return IB_RATE_168_GBPS; case 10: return IB_RATE_25_GBPS; case 40: return IB_RATE_100_GBPS; case 80: return IB_RATE_200_GBPS; case 120: return IB_RATE_300_GBPS; case 11: return IB_RATE_28_GBPS; case 20: return IB_RATE_50_GBPS; case 160: return IB_RATE_400_GBPS; case 240: return IB_RATE_600_GBPS; case 320: return IB_RATE_800_GBPS; case 640: return IB_RATE_1600_GBPS; default: return IB_RATE_PORT_CURRENT; } } EXPORT_SYMBOL(mult_to_ib_rate); __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 2500; case IB_RATE_5_GBPS: return 5000; case IB_RATE_10_GBPS: return 10000; case IB_RATE_20_GBPS: return 20000; case IB_RATE_30_GBPS: return 30000; case IB_RATE_40_GBPS: return 40000; case IB_RATE_60_GBPS: return 60000; case IB_RATE_80_GBPS: return 80000; case IB_RATE_120_GBPS: return 120000; case IB_RATE_14_GBPS: return 14062; case IB_RATE_56_GBPS: return 56250; case IB_RATE_112_GBPS: return 112500; case IB_RATE_168_GBPS: return 168750; case IB_RATE_25_GBPS: return 25781; case IB_RATE_100_GBPS: return 103125; case IB_RATE_200_GBPS: return 206250; case IB_RATE_300_GBPS: return 309375; case IB_RATE_28_GBPS: return 28125; case IB_RATE_50_GBPS: return 53125; case IB_RATE_400_GBPS: return 425000; case IB_RATE_600_GBPS: return 637500; case IB_RATE_800_GBPS: return 850000; case IB_RATE_1600_GBPS: return 1700000; default: return -1; } } EXPORT_SYMBOL(ib_rate_to_mbps); __attribute_const__ enum rdma_transport_type rdma_node_get_transport(unsigned int node_type) { if (node_type == RDMA_NODE_USNIC) return RDMA_TRANSPORT_USNIC; if (node_type == RDMA_NODE_USNIC_UDP) return RDMA_TRANSPORT_USNIC_UDP; if (node_type == RDMA_NODE_RNIC) return RDMA_TRANSPORT_IWARP; if (node_type == RDMA_NODE_UNSPECIFIED) return RDMA_TRANSPORT_UNSPECIFIED; return RDMA_TRANSPORT_IB; } EXPORT_SYMBOL(rdma_node_get_transport); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u32 port_num) { enum rdma_transport_type lt; if (device->ops.get_link_layer) return device->ops.get_link_layer(device, port_num); lt = rdma_node_get_transport(device->node_type); if (lt == RDMA_TRANSPORT_IB) return IB_LINK_LAYER_INFINIBAND; return IB_LINK_LAYER_ETHERNET; } EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ /** * __ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * @flags: protection domain flags * @caller: caller's build-time module name * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. * * Every PD has a local_dma_lkey which can be used as the lkey value for local * memory operations. */ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller) { struct ib_pd *pd; int mr_access_flags = 0; int ret; pd = rdma_zalloc_drv_obj(device, ib_pd); if (!pd) return ERR_PTR(-ENOMEM); pd->device = device; pd->flags = flags; rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); rdma_restrack_set_name(&pd->res, caller); ret = device->ops.alloc_pd(pd, NULL); if (ret) { rdma_restrack_put(&pd->res); kfree(pd); return ERR_PTR(ret); } rdma_restrack_add(&pd->res); if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else mr_access_flags |= IB_ACCESS_LOCAL_WRITE; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { pr_warn("%s: enabling unsafe global rkey\n", caller); mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; } if (mr_access_flags) { struct ib_mr *mr; mr = pd->device->ops.get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); return ERR_CAST(mr); } mr->device = pd->device; mr->pd = pd; mr->type = IB_MR_TYPE_DMA; mr->uobject = NULL; mr->need_inval = false; pd->__internal_mr = mr; if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)) pd->local_dma_lkey = pd->__internal_mr->lkey; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) pd->unsafe_global_rkey = pd->__internal_mr->rkey; } return pd; } EXPORT_SYMBOL(__ib_alloc_pd); /** * ib_dealloc_pd_user - Deallocates a protection domain. * @pd: The protection domain to deallocate. * @udata: Valid user data or NULL for kernel object * * It is an error to call this function while any resources in the pd still * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) { int ret; if (pd->__internal_mr) { ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL); WARN_ON(ret); pd->__internal_mr = NULL; } ret = pd->device->ops.dealloc_pd(pd, udata); if (ret) return ret; rdma_restrack_del(&pd->res); kfree(pd); return ret; } EXPORT_SYMBOL(ib_dealloc_pd_user); /* Address handles */ /** * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination. * @dest: Pointer to destination ah_attr. Contents of the destination * pointer is assumed to be invalid and attribute are overwritten. * @src: Pointer to source ah_attr. */ void rdma_copy_ah_attr(struct rdma_ah_attr *dest, const struct rdma_ah_attr *src) { *dest = *src; if (dest->grh.sgid_attr) rdma_hold_gid_attr(dest->grh.sgid_attr); } EXPORT_SYMBOL(rdma_copy_ah_attr); /** * rdma_replace_ah_attr - Replace valid ah_attr with new one. * @old: Pointer to existing ah_attr which needs to be replaced. * old is assumed to be valid or zero'd * @new: Pointer to the new ah_attr. * * rdma_replace_ah_attr() first releases any reference in the old ah_attr if * old the ah_attr is valid; after that it copies the new attribute and holds * the reference to the replaced ah_attr. */ void rdma_replace_ah_attr(struct rdma_ah_attr *old, const struct rdma_ah_attr *new) { rdma_destroy_ah_attr(old); *old = *new; if (old->grh.sgid_attr) rdma_hold_gid_attr(old->grh.sgid_attr); } EXPORT_SYMBOL(rdma_replace_ah_attr); /** * rdma_move_ah_attr - Move ah_attr pointed by source to destination. * @dest: Pointer to destination ah_attr to copy to. * dest is assumed to be valid or zero'd * @src: Pointer to the new ah_attr. * * rdma_move_ah_attr() first releases any reference in the destination ah_attr * if it is valid. This also transfers ownership of internal references from * src to dest, making src invalid in the process. No new reference of the src * ah_attr is taken. */ void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src) { rdma_destroy_ah_attr(dest); *dest = *src; src->grh.sgid_attr = NULL; } EXPORT_SYMBOL(rdma_move_ah_attr); /* * Validate that the rdma_ah_attr is valid for the device before passing it * off to the driver. */ static int rdma_check_ah_attr(struct ib_device *device, struct rdma_ah_attr *ah_attr) { if (!rdma_is_port_valid(device, ah_attr->port_num)) return -EINVAL; if ((rdma_is_grh_required(device, ah_attr->port_num) || ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) && !(ah_attr->ah_flags & IB_AH_GRH)) return -EINVAL; if (ah_attr->grh.sgid_attr) { /* * Make sure the passed sgid_attr is consistent with the * parameters */ if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index || ah_attr->grh.sgid_attr->port_num != ah_attr->port_num) return -EINVAL; } return 0; } /* * If the ah requires a GRH then ensure that sgid_attr pointer is filled in. * On success the caller is responsible to call rdma_unfill_sgid_attr(). */ static int rdma_fill_sgid_attr(struct ib_device *device, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr **old_sgid_attr) { const struct ib_gid_attr *sgid_attr; struct ib_global_route *grh; int ret; *old_sgid_attr = ah_attr->grh.sgid_attr; ret = rdma_check_ah_attr(device, ah_attr); if (ret) return ret; if (!(ah_attr->ah_flags & IB_AH_GRH)) return 0; grh = rdma_ah_retrieve_grh(ah_attr); if (grh->sgid_attr) return 0; sgid_attr = rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index); if (IS_ERR(sgid_attr)) return PTR_ERR(sgid_attr); /* Move ownerhip of the kref into the ah_attr */ grh->sgid_attr = sgid_attr; return 0; } static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *old_sgid_attr) { /* * Fill didn't change anything, the caller retains ownership of * whatever it passed */ if (ah_attr->grh.sgid_attr == old_sgid_attr) return; /* * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller * doesn't see any change in the rdma_ah_attr. If we get here * old_sgid_attr is NULL. */ rdma_destroy_ah_attr(ah_attr); } static const struct ib_gid_attr * rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *old_attr) { if (old_attr) rdma_put_gid_attr(old_attr); if (ah_attr->ah_flags & IB_AH_GRH) { rdma_hold_gid_attr(ah_attr->grh.sgid_attr); return ah_attr->grh.sgid_attr; } return NULL; } static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata, struct net_device *xmit_slave) { struct rdma_ah_init_attr init_attr = {}; struct ib_device *device = pd->device; struct ib_ah *ah; int ret; might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); if (!udata && !device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); ah = rdma_zalloc_drv_obj_gfp( device, ib_ah, (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); ah->device = device; ah->pd = pd; ah->type = ah_attr->type; ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); init_attr.ah_attr = ah_attr; init_attr.flags = flags; init_attr.xmit_slave = xmit_slave; if (udata) ret = device->ops.create_user_ah(ah, &init_attr, udata); else ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { if (ah->sgid_attr) rdma_put_gid_attr(ah->sgid_attr); kfree(ah); return ERR_PTR(ret); } atomic_inc(&pd->usecnt); return ah; } /** * rdma_create_ah - Creates an address handle for the * given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. * @flags: Create address handle flags (see enum rdma_create_ah_flags). * * It returns 0 on success and returns appropriate error code on error. * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags) { const struct ib_gid_attr *old_sgid_attr; struct net_device *slave; struct ib_ah *ah; int ret; ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); if (ret) return ERR_PTR(ret); slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr, (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); if (IS_ERR(slave)) { rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ERR_CAST(slave); } ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave); rdma_lag_put_ah_roce_slave(slave); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; } EXPORT_SYMBOL(rdma_create_ah); /** * rdma_create_user_ah - Creates an address handle for the * given address vector. * It resolves destination mac address for ah attribute of RoCE type. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. * @udata: pointer to user's input output buffer information need by * provider driver. * * It returns 0 on success and returns appropriate error code on error. * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) { const struct ib_gid_attr *old_sgid_attr; struct ib_ah *ah; int err; err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); if (err) return ERR_PTR(err); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { err = ib_resolve_eth_dmac(pd->device, ah_attr); if (err) { ah = ERR_PTR(err); goto out; } } ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata, NULL); out: rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; } EXPORT_SYMBOL(rdma_create_user_ah); int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; struct iphdr ip4h_checked; const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh; /* If it's IPv6, the version must be 6, otherwise, the first * 20 bytes (before the IPv4 header) are garbled. */ if (ip6h->version != 6) return (ip4h->version == 4) ? 4 : 0; /* version may be 6 or 4 because the first 20 bytes could be garbled */ /* RoCE v2 requires no options, thus header length * must be 5 words */ if (ip4h->ihl != 5) return 6; /* Verify checksum. * We can't write on scattered buffers so we need to copy to * temp buffer. */ memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); ip4h_checked.check = 0; ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5); /* if IPv4 header checksum is OK, believe it */ if (ip4h->check == ip4h_checked.check) return 4; return 6; } EXPORT_SYMBOL(ib_get_rdma_header_version); static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, u32 port_num, const struct ib_grh *grh) { int grh_version; if (rdma_protocol_ib(device, port_num)) return RDMA_NETWORK_IB; grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh); if (grh_version == 4) return RDMA_NETWORK_IPV4; if (grh->next_hdr == IPPROTO_UDP) return RDMA_NETWORK_IPV6; return RDMA_NETWORK_ROCE_V1; } struct find_gid_index_context { u16 vlan_id; enum ib_gid_type gid_type; }; static bool find_gid_index(const union ib_gid *gid, const struct ib_gid_attr *gid_attr, void *context) { struct find_gid_index_context *ctx = context; u16 vlan_id = 0xffff; int ret; if (ctx->gid_type != gid_attr->gid_type) return false; ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return false; return ctx->vlan_id == vlan_id; } static const struct ib_gid_attr * get_sgid_attr_from_eth(struct ib_device *device, u32 port_num, u16 vlan_id, const union ib_gid *sgid, enum ib_gid_type gid_type) { struct find_gid_index_context context = {.vlan_id = vlan_id, .gid_type = gid_type}; return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index, &context); } int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, enum rdma_network_type net_type, union ib_gid *sgid, union ib_gid *dgid) { struct sockaddr_in src_in; struct sockaddr_in dst_in; __be32 src_saddr, dst_saddr; if (!sgid || !dgid) return -EINVAL; if (net_type == RDMA_NETWORK_IPV4) { memcpy(&src_in.sin_addr.s_addr, &hdr->roce4grh.saddr, 4); memcpy(&dst_in.sin_addr.s_addr, &hdr->roce4grh.daddr, 4); src_saddr = src_in.sin_addr.s_addr; dst_saddr = dst_in.sin_addr.s_addr; ipv6_addr_set_v4mapped(src_saddr, (struct in6_addr *)sgid); ipv6_addr_set_v4mapped(dst_saddr, (struct in6_addr *)dgid); return 0; } else if (net_type == RDMA_NETWORK_IPV6 || net_type == RDMA_NETWORK_IB || net_type == RDMA_NETWORK_ROCE_V1) { *dgid = hdr->ibgrh.dgid; *sgid = hdr->ibgrh.sgid; return 0; } else { return -EINVAL; } } EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); /* Resolve destination mac address and hop limit for unicast destination * GID entry, considering the source GID entry as well. * ah_attribute must have valid port_num, sgid_index. */ static int ib_resolve_unicast_gid_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr); const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int hop_limit = 0xff; int ret = 0; /* If destination is link local and source GID is RoCEv1, * IP stack is not used. */ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) && sgid_attr->gid_type == IB_GID_TYPE_ROCE) { rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, ah_attr->roce.dmac); return ret; } ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, ah_attr->roce.dmac, sgid_attr, &hop_limit); grh->hop_limit = hop_limit; return ret; } /* * This function initializes address handle attributes from the incoming packet. * Incoming packet has dgid of the receiver node on which this code is * getting executed and, sgid contains the GID of the sender. * * When resolving mac address of destination, the arrived dgid is used * as sgid and, sgid is used as dgid because sgid contains destinations * GID whom to respond to. * * On success the caller is responsible to call rdma_destroy_ah_attr on the * attr. */ int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) { u32 flow_class; int ret; enum rdma_network_type net_type = RDMA_NETWORK_IB; enum ib_gid_type gid_type = IB_GID_TYPE_IB; const struct ib_gid_attr *sgid_attr; int hoplimit = 0xff; union ib_gid dgid; union ib_gid sgid; might_sleep(); memset(ah_attr, 0, sizeof *ah_attr); ah_attr->type = rdma_ah_find_type(device, port_num); if (rdma_cap_eth_ah(device, port_num)) { if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE) net_type = wc->network_hdr_type; else net_type = ib_get_net_type_by_grh(device, port_num, grh); gid_type = ib_network_to_gid_type(net_type); } ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, &sgid, &dgid); if (ret) return ret; rdma_ah_set_sl(ah_attr, wc->sl); rdma_ah_set_port_num(ah_attr, port_num); if (rdma_protocol_roce(device, port_num)) { u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? wc->vlan_id : 0xffff; if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; sgid_attr = get_sgid_attr_from_eth(device, port_num, vlan_id, &dgid, gid_type); if (IS_ERR(sgid_attr)) return PTR_ERR(sgid_attr); flow_class = be32_to_cpu(grh->version_tclass_flow); rdma_move_grh_sgid_attr(ah_attr, &sgid, flow_class & 0xFFFFF, hoplimit, (flow_class >> 20) & 0xFF, sgid_attr); ret = ib_resolve_unicast_gid_dmac(device, ah_attr); if (ret) rdma_destroy_ah_attr(ah_attr); return ret; } else { rdma_ah_set_dlid(ah_attr, wc->slid); rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); if ((wc->wc_flags & IB_WC_GRH) == 0) return 0; if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { sgid_attr = rdma_find_gid_by_port( device, &dgid, IB_GID_TYPE_IB, port_num, NULL); } else sgid_attr = rdma_get_gid_attr(device, port_num, 0); if (IS_ERR(sgid_attr)) return PTR_ERR(sgid_attr); flow_class = be32_to_cpu(grh->version_tclass_flow); rdma_move_grh_sgid_attr(ah_attr, &sgid, flow_class & 0xFFFFF, hoplimit, (flow_class >> 20) & 0xFF, sgid_attr); return 0; } } EXPORT_SYMBOL(ib_init_ah_attr_from_wc); /** * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership * of the reference * * @attr: Pointer to AH attribute structure * @dgid: Destination GID * @flow_label: Flow label * @hop_limit: Hop limit * @traffic_class: traffic class * @sgid_attr: Pointer to SGID attribute * * This takes ownership of the sgid_attr reference. The caller must ensure * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after * calling this function. */ void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, u32 flow_label, u8 hop_limit, u8 traffic_class, const struct ib_gid_attr *sgid_attr) { rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit, traffic_class); attr->grh.sgid_attr = sgid_attr; } EXPORT_SYMBOL(rdma_move_grh_sgid_attr); /** * rdma_destroy_ah_attr - Release reference to SGID attribute of * ah attribute. * @ah_attr: Pointer to ah attribute * * Release reference to the SGID attribute of the ah attribute if it is * non NULL. It is safe to call this multiple times, and safe to call it on * a zero initialized ah_attr. */ void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr) { if (ah_attr->grh.sgid_attr) { rdma_put_gid_attr(ah_attr->grh.sgid_attr); ah_attr->grh.sgid_attr = NULL; } } EXPORT_SYMBOL(rdma_destroy_ah_attr); struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u32 port_num) { struct rdma_ah_attr ah_attr; struct ib_ah *ah; int ret; ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr); if (ret) return ERR_PTR(ret); ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); rdma_destroy_ah_attr(&ah_attr); return ah; } EXPORT_SYMBOL(ib_create_ah_from_wc); int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { const struct ib_gid_attr *old_sgid_attr; int ret; if (ah->type != ah_attr->type) return -EINVAL; ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr); if (ret) return ret; ret = ah->device->ops.modify_ah ? ah->device->ops.modify_ah(ah, ah_attr) : -EOPNOTSUPP; ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ret; } EXPORT_SYMBOL(rdma_modify_ah); int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { ah_attr->grh.sgid_attr = NULL; return ah->device->ops.query_ah ? ah->device->ops.query_ah(ah, ah_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_query_ah); int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; int ret; might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); pd = ah->pd; ret = ah->device->ops.destroy_ah(ah, flags); if (ret) return ret; atomic_dec(&pd->usecnt); if (sgid_attr) rdma_put_gid_attr(sgid_attr); kfree(ah); return ret; } EXPORT_SYMBOL(rdma_destroy_ah_user); /* Shared receive queues */ /** * ib_create_srq_user - Creates a SRQ associated with the specified protection * domain. * @pd: The protection domain associated with the SRQ. * @srq_init_attr: A list of initial attributes required to create the * SRQ. If SRQ creation succeeds, then the attributes are updated to * the actual capabilities of the created SRQ. * @uobject: uobject pointer if this is not a kernel SRQ * @udata: udata pointer if this is not a kernel SRQ * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated * on return. If ib_create_srq() succeeds, then max_wr and max_sge * will always be at least as large as the requested values. */ struct ib_srq *ib_create_srq_user(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_usrq_object *uobject, struct ib_udata *udata) { struct ib_srq *srq; int ret; srq = rdma_zalloc_drv_obj(pd->device, ib_srq); if (!srq) return ERR_PTR(-ENOMEM); srq->device = pd->device; srq->pd = pd; srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; srq->srq_type = srq_init_attr->srq_type; srq->uobject = uobject; if (ib_srq_has_cq(srq->srq_type)) { srq->ext.cq = srq_init_attr->ext.cq; atomic_inc(&srq->ext.cq->usecnt); } if (srq->srq_type == IB_SRQT_XRC) { srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; if (srq->ext.xrc.xrcd) atomic_inc(&srq->ext.xrc.xrcd->usecnt); } atomic_inc(&pd->usecnt); rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ); rdma_restrack_parent_name(&srq->res, &pd->res); ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); if (ret) { rdma_restrack_put(&srq->res); atomic_dec(&pd->usecnt); if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) atomic_dec(&srq->ext.xrc.xrcd->usecnt); if (ib_srq_has_cq(srq->srq_type)) atomic_dec(&srq->ext.cq->usecnt); kfree(srq); return ERR_PTR(ret); } rdma_restrack_add(&srq->res); return srq; } EXPORT_SYMBOL(ib_create_srq_user); int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { return srq->device->ops.modify_srq ? srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask, NULL) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_modify_srq); int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) { return srq->device->ops.query_srq ? srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) { int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; ret = srq->device->ops.destroy_srq(srq, udata); if (ret) return ret; atomic_dec(&srq->pd->usecnt); if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) atomic_dec(&srq->ext.xrc.xrcd->usecnt); if (ib_srq_has_cq(srq->srq_type)) atomic_dec(&srq->ext.cq->usecnt); rdma_restrack_del(&srq->res); kfree(srq); return ret; } EXPORT_SYMBOL(ib_destroy_srq_user); /* Queue pairs */ static void __ib_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = event->element.qp; if (event->event == IB_EVENT_QP_LAST_WQE_REACHED) complete(&qp->srq_completion); if (qp->registered_event_handler) qp->registered_event_handler(event, qp->qp_context); } static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = context; unsigned long flags; spin_lock_irqsave(&qp->device->qp_open_list_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); } static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, void (*event_handler)(struct ib_event *, void *), void *qp_context) { struct ib_qp *qp; unsigned long flags; int err; qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); qp->real_qp = real_qp; err = ib_open_shared_qp_security(qp, real_qp->device); if (err) { kfree(qp); return ERR_PTR(err); } qp->real_qp = real_qp; atomic_inc(&real_qp->usecnt); qp->device = real_qp->device; qp->event_handler = event_handler; qp->qp_context = qp_context; qp->qp_num = real_qp->qp_num; qp->qp_type = real_qp->qp_type; spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_add(&qp->open_list, &real_qp->open_list); spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); return qp; } struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, struct ib_qp_open_attr *qp_open_attr) { struct ib_qp *qp, *real_qp; if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) return ERR_PTR(-EINVAL); down_read(&xrcd->tgt_qps_rwsem); real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num); if (!real_qp) { up_read(&xrcd->tgt_qps_rwsem); return ERR_PTR(-EINVAL); } qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, qp_open_attr->qp_context); up_read(&xrcd->tgt_qps_rwsem); return qp; } EXPORT_SYMBOL(ib_open_qp); static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; int err; qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; qp->pd = NULL; qp->send_cq = qp->recv_cq = NULL; qp->srq = NULL; qp->xrcd = qp_init_attr->xrcd; atomic_inc(&qp_init_attr->xrcd->usecnt); INIT_LIST_HEAD(&qp->open_list); qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, qp_init_attr->qp_context); if (IS_ERR(qp)) return qp; err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num, real_qp, GFP_KERNEL)); if (err) { ib_close_qp(qp); return ERR_PTR(err); } return qp; } static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata, struct ib_uqp_object *uobj, const char *caller) { struct ib_udata dummy = {}; struct ib_qp *qp; int ret; if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); qp = rdma_zalloc_drv_obj_numa(dev, ib_qp); if (!qp) return ERR_PTR(-ENOMEM); qp->device = dev; qp->pd = pd; qp->uobject = uobj; qp->real_qp = qp; qp->qp_type = attr->qp_type; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->srq = attr->srq; qp->event_handler = __ib_qp_event_handler; qp->registered_event_handler = attr->event_handler; qp->port = attr->port_num; qp->qp_context = attr->qp_context; spin_lock_init(&qp->mr_lock); INIT_LIST_HEAD(&qp->rdma_mrs); INIT_LIST_HEAD(&qp->sig_mrs); init_completion(&qp->srq_completion); qp->send_cq = attr->send_cq; qp->recv_cq = attr->recv_cq; rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); rdma_restrack_set_name(&qp->res, udata ? NULL : caller); ret = dev->ops.create_qp(qp, attr, udata); if (ret) goto err_create; /* * TODO: The mlx4 internally overwrites send_cq and recv_cq. * Unfortunately, it is not an easy task to fix that driver. */ qp->send_cq = attr->send_cq; qp->recv_cq = attr->recv_cq; ret = ib_create_qp_security(qp, dev); if (ret) goto err_security; rdma_restrack_add(&qp->res); return qp; err_security: qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL); err_create: rdma_restrack_put(&qp->res); kfree(qp); return ERR_PTR(ret); } /** * ib_create_qp_user - Creates a QP associated with the specified protection * domain. * @dev: IB device * @pd: The protection domain associated with the QP. * @attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. * @udata: User data * @uobj: uverbs obect * @caller: caller's build-time module name */ struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata, struct ib_uqp_object *uobj, const char *caller) { struct ib_qp *qp, *xrc_qp; if (attr->qp_type == IB_QPT_XRC_TGT) qp = create_qp(dev, pd, attr, NULL, NULL, caller); else qp = create_qp(dev, pd, attr, udata, uobj, NULL); if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp)) return qp; xrc_qp = create_xrc_qp_user(qp, attr); if (IS_ERR(xrc_qp)) { ib_destroy_qp(qp); return xrc_qp; } xrc_qp->uobject = uobj; return xrc_qp; } EXPORT_SYMBOL(ib_create_qp_user); void ib_qp_usecnt_inc(struct ib_qp *qp) { if (qp->pd) atomic_inc(&qp->pd->usecnt); if (qp->send_cq) atomic_inc(&qp->send_cq->usecnt); if (qp->recv_cq) atomic_inc(&qp->recv_cq->usecnt); if (qp->srq) atomic_inc(&qp->srq->usecnt); if (qp->rwq_ind_tbl) atomic_inc(&qp->rwq_ind_tbl->usecnt); } EXPORT_SYMBOL(ib_qp_usecnt_inc); void ib_qp_usecnt_dec(struct ib_qp *qp) { if (qp->rwq_ind_tbl) atomic_dec(&qp->rwq_ind_tbl->usecnt); if (qp->srq) atomic_dec(&qp->srq->usecnt); if (qp->recv_cq) atomic_dec(&qp->recv_cq->usecnt); if (qp->send_cq) atomic_dec(&qp->send_cq->usecnt); if (qp->pd) atomic_dec(&qp->pd->usecnt); } EXPORT_SYMBOL(ib_qp_usecnt_dec); struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, const char *caller) { struct ib_device *device = pd->device; struct ib_qp *qp; int ret; /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. * * Note that these callers need to pass in a port number. */ if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller); if (IS_ERR(qp)) return qp; ib_qp_usecnt_inc(qp); if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); if (ret) goto err; } /* * Note: all hw drivers guarantee that max_send_sge is lower than * the device RDMA WRITE SGE limit but not all hw drivers ensure that * max_send_sge <= max_sge_rd. */ qp->max_write_sge = qp_init_attr->cap.max_send_sge; qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, device->attrs.max_sge_rd); if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) qp->integrity_en = true; return qp; err: ib_destroy_qp(qp); return ERR_PTR(ret); } EXPORT_SYMBOL(ib_create_qp_kernel); static const struct { int valid; enum ib_qp_attr_mask req_param[IB_QPT_MAX]; enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), [IB_QPT_RAW_PACKET] = IB_QP_PORT, [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } }, }, [IB_QPS_INIT] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } }, [IB_QPS_RTR] = { .valid = 1, .req_param = { [IB_QPT_UC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN), [IB_QPT_RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), [IB_QPT_XRC_INI] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN), [IB_QPT_XRC_TGT] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), }, }, }, [IB_QPS_RTR] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .req_param = { [IB_QPT_UD] = IB_QP_SQ_PSN, [IB_QPT_UC] = IB_QP_SQ_PSN, [IB_QPT_RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | IB_QP_SQ_PSN), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } } }, [IB_QPS_RTS] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } }, [IB_QPS_SQD] = { .valid = 1, .opt_param = { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } }, }, [IB_QPS_SQD] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } }, [IB_QPS_SQD] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_AV | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_INI] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } } }, [IB_QPS_SQE] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } } }, [IB_QPS_ERR] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 } } }; bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask) { enum ib_qp_attr_mask req_param, opt_param; if (mask & IB_QP_CUR_STATE && cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) return false; if (!qp_state_table[cur_state][next_state].valid) return false; req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; if ((mask & req_param) != req_param) return false; if (mask & ~(req_param | opt_param | IB_QP_STATE)) return false; return true; } EXPORT_SYMBOL(ib_modify_qp_is_ok); /** * ib_resolve_eth_dmac - Resolve destination mac address * @device: Device to consider * @ah_attr: address handle attribute which describes the * source and destination parameters * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It * returns 0 on success or appropriate error code. It initializes the * necessary ah_attr fields when call is successful. */ static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { int ret = 0; if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { __be32 addr = 0; memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4); ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac); } else { ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw, (char *)ah_attr->roce.dmac); } } else { ret = ib_resolve_unicast_gid_dmac(device, ah_attr); } return ret; } static bool is_qp_type_connected(const struct ib_qp *qp) { return (qp->qp_type == IB_QPT_UC || qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT); } /* * IB core internal function to perform QP attributes modification. */ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; const struct ib_gid_attr *old_sgid_attr_av; const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; attr->xmit_slave = NULL; if (attr_mask & IB_QP_AV) { ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, &old_sgid_attr_av); if (ret) return ret; if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && is_qp_type_connected(qp)) { struct net_device *slave; /* * If the user provided the qp_attr then we have to * resolve it. Kerne users have to provide already * resolved rdma_ah_attr's. */ if (udata) { ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); if (ret) goto out_av; } slave = rdma_lag_get_ah_roce_slave(qp->device, &attr->ah_attr, GFP_KERNEL); if (IS_ERR(slave)) { ret = PTR_ERR(slave); goto out_av; } attr->xmit_slave = slave; } } if (attr_mask & IB_QP_ALT_PATH) { /* * FIXME: This does not track the migration state, so if the * user loads a new alternate path after the HW has migrated * from primary->alternate we will keep the wrong * references. This is OK for IB because the reference * counting does not serve any functional purpose. */ ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr, &old_sgid_attr_alt_av); if (ret) goto out_av; /* * Today the core code can only handle alternate paths and APM * for IB. Ban them in roce mode. */ if (!(rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num) && rdma_protocol_ib(qp->device, port))) { ret = -EINVAL; goto out; } } if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { dev_warn(&qp->device->dev, "%s rq_psn overflow, masking to 24 bits\n", __func__); attr->rq_psn &= 0xffffff; } if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) { dev_warn(&qp->device->dev, " %s sq_psn overflow, masking to 24 bits\n", __func__); attr->sq_psn &= 0xffffff; } } /* * Bind this qp to a counter automatically based on the rdma counter * rules. This only set in RST2INIT with port specified */ if (!qp->counter && (attr_mask & IB_QP_PORT) && ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT)) rdma_counter_bind_qp_auto(qp, attr->port_num); ret = ib_security_modify_qp(qp, attr, attr_mask, udata); if (ret) goto out; if (attr_mask & IB_QP_PORT) qp->port = attr->port_num; if (attr_mask & IB_QP_AV) qp->av_sgid_attr = rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr); if (attr_mask & IB_QP_ALT_PATH) qp->alt_path_sgid_attr = rdma_update_sgid_attr( &attr->alt_ah_attr, qp->alt_path_sgid_attr); out: if (attr_mask & IB_QP_ALT_PATH) rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); out_av: if (attr_mask & IB_QP_AV) { rdma_lag_put_ah_roce_slave(attr->xmit_slave); rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); } return ret; } /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. * @ib_qp: The QP to modify. * @attr: On input, specifies the QP attributes to modify. On output, * the current values of selected QP attributes are returned. * @attr_mask: A bit-mask used to specify which attributes of the QP * are being modified. * @udata: pointer to user's input output buffer information * are being modified. * It returns 0 on success and returns appropriate error code on error. */ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); } EXPORT_SYMBOL(ib_modify_qp_with_udata); static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes, u16 *speed, u8 *width) { if (!lanes) { if (netdev_speed <= SPEED_1000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_SDR; } else if (netdev_speed <= SPEED_10000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_FDR10; } else if (netdev_speed <= SPEED_20000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_DDR; } else if (netdev_speed <= SPEED_25000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_EDR; } else if (netdev_speed <= SPEED_40000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_FDR10; } else if (netdev_speed <= SPEED_50000) { *width = IB_WIDTH_2X; *speed = IB_SPEED_EDR; } else if (netdev_speed <= SPEED_100000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_EDR; } else if (netdev_speed <= SPEED_200000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_HDR; } else { *width = IB_WIDTH_4X; *speed = IB_SPEED_NDR; } return; } switch (lanes) { case 1: *width = IB_WIDTH_1X; break; case 2: *width = IB_WIDTH_2X; break; case 4: *width = IB_WIDTH_4X; break; case 8: *width = IB_WIDTH_8X; break; case 12: *width = IB_WIDTH_12X; break; default: *width = IB_WIDTH_1X; } switch (netdev_speed / lanes) { case SPEED_2500: *speed = IB_SPEED_SDR; break; case SPEED_5000: *speed = IB_SPEED_DDR; break; case SPEED_10000: *speed = IB_SPEED_FDR10; break; case SPEED_14000: *speed = IB_SPEED_FDR; break; case SPEED_25000: *speed = IB_SPEED_EDR; break; case SPEED_50000: *speed = IB_SPEED_HDR; break; case SPEED_100000: *speed = IB_SPEED_NDR; break; default: *speed = IB_SPEED_SDR; } } int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) { int rc; u32 netdev_speed; struct net_device *netdev; struct ethtool_link_ksettings lksettings = {}; if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; netdev = ib_device_get_netdev(dev, port_num); if (!netdev) return -ENODEV; rtnl_lock(); rc = __ethtool_get_link_ksettings(netdev, &lksettings); rtnl_unlock(); dev_put(netdev); if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) { netdev_speed = lksettings.base.speed; } else { netdev_speed = SPEED_1000; if (rc) pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name, netdev_speed); } ib_get_width_and_speed(netdev_speed, lksettings.lanes, speed, width); return 0; } EXPORT_SYMBOL(ib_get_eth_speed); int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); int ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { qp_attr->ah_attr.grh.sgid_attr = NULL; qp_attr->alt_ah_attr.grh.sgid_attr = NULL; return qp->device->ops.query_qp ? qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_qp); int ib_close_qp(struct ib_qp *qp) { struct ib_qp *real_qp; unsigned long flags; real_qp = qp->real_qp; if (real_qp == qp) return -EINVAL; spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_del(&qp->open_list); spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); atomic_dec(&real_qp->usecnt); if (qp->qp_sec) ib_close_shared_qp_security(qp->qp_sec); kfree(qp); return 0; } EXPORT_SYMBOL(ib_close_qp); static int __ib_destroy_shared_qp(struct ib_qp *qp) { struct ib_xrcd *xrcd; struct ib_qp *real_qp; int ret; real_qp = qp->real_qp; xrcd = real_qp->xrcd; down_write(&xrcd->tgt_qps_rwsem); ib_close_qp(qp); if (atomic_read(&real_qp->usecnt) == 0) xa_erase(&xrcd->tgt_qps, real_qp->qp_num); else real_qp = NULL; up_write(&xrcd->tgt_qps_rwsem); if (real_qp) { ret = ib_destroy_qp(real_qp); if (!ret) atomic_dec(&xrcd->usecnt); } return 0; } int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) { const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; struct ib_qp_security *sec; int ret; WARN_ON_ONCE(qp->mrs_used > 0); if (atomic_read(&qp->usecnt)) return -EBUSY; if (qp->real_qp != qp) return __ib_destroy_shared_qp(qp); sec = qp->qp_sec; if (sec) ib_destroy_qp_security_begin(sec); if (!qp->uobject) rdma_rw_cleanup_mrs(qp); rdma_counter_unbind_qp(qp, qp->port, true); ret = qp->device->ops.destroy_qp(qp, udata); if (ret) { if (sec) ib_destroy_qp_security_abort(sec); return ret; } if (alt_path_sgid_attr) rdma_put_gid_attr(alt_path_sgid_attr); if (av_sgid_attr) rdma_put_gid_attr(av_sgid_attr); ib_qp_usecnt_dec(qp); if (sec) ib_destroy_qp_security_end(sec); rdma_restrack_del(&qp->res); kfree(qp); return ret; } EXPORT_SYMBOL(ib_destroy_qp_user); /* Completion queues */ struct ib_cq *__ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), void *cq_context, const struct ib_cq_init_attr *cq_attr, const char *caller) { struct ib_cq *cq; int ret; cq = rdma_zalloc_drv_obj(device, ib_cq); if (!cq) return ERR_PTR(-ENOMEM); cq->device = device; cq->uobject = NULL; cq->comp_handler = comp_handler; cq->event_handler = event_handler; cq->cq_context = cq_context; atomic_set(&cq->usecnt, 0); rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); rdma_restrack_set_name(&cq->res, caller); ret = device->ops.create_cq(cq, cq_attr, NULL); if (ret) { rdma_restrack_put(&cq->res); kfree(cq); return ERR_PTR(ret); } rdma_restrack_add(&cq->res); return cq; } EXPORT_SYMBOL(__ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { if (cq->shared) return -EOPNOTSUPP; return cq->device->ops.modify_cq ? cq->device->ops.modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_set_cq_moderation); int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { int ret; if (WARN_ON_ONCE(cq->shared)) return -EOPNOTSUPP; if (atomic_read(&cq->usecnt)) return -EBUSY; ret = cq->device->ops.destroy_cq(cq, udata); if (ret) return ret; rdma_restrack_del(&cq->res); kfree(cq); return ret; } EXPORT_SYMBOL(ib_destroy_cq_user); int ib_resize_cq(struct ib_cq *cq, int cqe) { if (cq->shared) return -EOPNOTSUPP; return cq->device->ops.resize_cq ? cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags) { struct ib_mr *mr; if (access_flags & IB_ACCESS_ON_DEMAND) { if (!(pd->device->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) { pr_debug("ODP support not available\n"); return ERR_PTR(-EINVAL); } } mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr, access_flags, NULL, NULL); if (IS_ERR(mr)) return mr; mr->device = pd->device; mr->type = IB_MR_TYPE_USER; mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); mr->iova = virt_addr; mr->length = length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); rdma_restrack_add(&mr->res); return mr; } EXPORT_SYMBOL(ib_reg_user_mr); int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge) { if (!pd->device->ops.advise_mr) return -EOPNOTSUPP; if (!num_sge) return 0; return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, NULL); } EXPORT_SYMBOL(ib_advise_mr); int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; struct ib_dm *dm = mr->dm; struct ib_dmah *dmah = mr->dmah; struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; trace_mr_dereg(mr); rdma_restrack_del(&mr->res); ret = mr->device->ops.dereg_mr(mr, udata); if (!ret) { atomic_dec(&pd->usecnt); if (dm) atomic_dec(&dm->usecnt); if (dmah) atomic_dec(&dmah->usecnt); kfree(sig_attrs); } return ret; } EXPORT_SYMBOL(ib_dereg_mr_user); /** * ib_alloc_mr() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. * * Notes: * Memory registeration page/sg lists must not exceed max_num_sg. * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed * max_num_sg * used_page_size. * */ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { struct ib_mr *mr; if (!pd->device->ops.alloc_mr) { mr = ERR_PTR(-EOPNOTSUPP); goto out; } if (mr_type == IB_MR_TYPE_INTEGRITY) { WARN_ON_ONCE(1); mr = ERR_PTR(-EINVAL); goto out; } mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); if (IS_ERR(mr)) goto out; mr->device = pd->device; mr->pd = pd; mr->dm = NULL; mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; mr->type = mr_type; mr->sig_attrs = NULL; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); rdma_restrack_add(&mr->res); out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr); /** * ib_alloc_mr_integrity() - Allocates an integrity memory region * @pd: protection domain associated with the region * @max_num_data_sg: maximum data sg entries available for registration * @max_num_meta_sg: maximum metadata sg entries available for * registration * * Notes: * Memory registration page/sg lists must not exceed max_num_sg, * also the integrity page/sg lists must not exceed max_num_meta_sg. * */ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, u32 max_num_data_sg, u32 max_num_meta_sg) { struct ib_mr *mr; struct ib_sig_attrs *sig_attrs; if (!pd->device->ops.alloc_mr_integrity || !pd->device->ops.map_mr_sg_pi) { mr = ERR_PTR(-EOPNOTSUPP); goto out; } if (!max_num_meta_sg) { mr = ERR_PTR(-EINVAL); goto out; } sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); if (!sig_attrs) { mr = ERR_PTR(-ENOMEM); goto out; } mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, max_num_meta_sg); if (IS_ERR(mr)) { kfree(sig_attrs); goto out; } mr->device = pd->device; mr->pd = pd; mr->dm = NULL; mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; mr->type = IB_MR_TYPE_INTEGRITY; mr->sig_attrs = sig_attrs; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); rdma_restrack_add(&mr->res); out: trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr_integrity); /* Multicast groups */ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) { struct ib_qp_init_attr init_attr = {}; struct ib_qp_attr attr = {}; int num_eth_ports = 0; unsigned int port; /* If QP state >= init, it is assigned to a port and we can check this * port only. */ if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) { if (attr.qp_state >= IB_QPS_INIT) { if (rdma_port_get_link_layer(qp->device, attr.port_num) != IB_LINK_LAYER_INFINIBAND) return true; goto lid_check; } } /* Can't get a quick answer, iterate over all ports */ rdma_for_each_port(qp->device, port) if (rdma_port_get_link_layer(qp->device, port) != IB_LINK_LAYER_INFINIBAND) num_eth_ports++; /* If we have at lease one Ethernet port, RoCE annex declares that * multicast LID should be ignored. We can't tell at this step if the * QP belongs to an IB or Ethernet port. */ if (num_eth_ports) return true; /* If all the ports are IB, we can check according to IB spec. */ lid_check: return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || lid == be16_to_cpu(IB_LID_PERMISSIVE)); } int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; if (!qp->device->ops.attach_mcast) return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; ret = qp->device->ops.attach_mcast(qp, gid, lid); if (!ret) atomic_inc(&qp->usecnt); return ret; } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; if (!qp->device->ops.detach_mcast) return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; ret = qp->device->ops.detach_mcast(qp, gid, lid); if (!ret) atomic_dec(&qp->usecnt); return ret; } EXPORT_SYMBOL(ib_detach_mcast); /** * ib_alloc_xrcd_user - Allocates an XRC domain. * @device: The device on which to allocate the XRC domain. * @inode: inode to connect XRCD * @udata: Valid user data or NULL for kernel object */ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata) { struct ib_xrcd *xrcd; int ret; if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); xrcd = rdma_zalloc_drv_obj(device, ib_xrcd); if (!xrcd) return ERR_PTR(-ENOMEM); xrcd->device = device; xrcd->inode = inode; atomic_set(&xrcd->usecnt, 0); init_rwsem(&xrcd->tgt_qps_rwsem); xa_init(&xrcd->tgt_qps); ret = device->ops.alloc_xrcd(xrcd, udata); if (ret) goto err; return xrcd; err: kfree(xrcd); return ERR_PTR(ret); } EXPORT_SYMBOL(ib_alloc_xrcd_user); /** * ib_dealloc_xrcd_user - Deallocates an XRC domain. * @xrcd: The XRC domain to deallocate. * @udata: Valid user data or NULL for kernel object */ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) { int ret; if (atomic_read(&xrcd->usecnt)) return -EBUSY; WARN_ON(!xa_empty(&xrcd->tgt_qps)); ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata); if (ret) return ret; kfree(xrcd); return ret; } EXPORT_SYMBOL(ib_dealloc_xrcd_user); /** * ib_create_wq - Creates a WQ associated with the specified protection * domain. * @pd: The protection domain associated with the WQ. * @wq_attr: A list of initial attributes required to create the * WQ. If WQ creation succeeds, then the attributes are updated to * the actual capabilities of the created WQ. * * wq_attr->max_wr and wq_attr->max_sge determine * the requested size of the WQ, and set to the actual values allocated * on return. * If ib_create_wq() succeeds, then max_wr and max_sge will always be * at least as large as the requested values. */ struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *wq_attr) { struct ib_wq *wq; if (!pd->device->ops.create_wq) return ERR_PTR(-EOPNOTSUPP); wq = pd->device->ops.create_wq(pd, wq_attr, NULL); if (!IS_ERR(wq)) { wq->event_handler = wq_attr->event_handler; wq->wq_context = wq_attr->wq_context; wq->wq_type = wq_attr->wq_type; wq->cq = wq_attr->cq; wq->device = pd->device; wq->pd = pd; wq->uobject = NULL; atomic_inc(&pd->usecnt); atomic_inc(&wq_attr->cq->usecnt); atomic_set(&wq->usecnt, 0); } return wq; } EXPORT_SYMBOL(ib_create_wq); /** * ib_destroy_wq_user - Destroys the specified user WQ. * @wq: The WQ to destroy. * @udata: Valid user data */ int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata) { struct ib_cq *cq = wq->cq; struct ib_pd *pd = wq->pd; int ret; if (atomic_read(&wq->usecnt)) return -EBUSY; ret = wq->device->ops.destroy_wq(wq, udata); if (ret) return ret; atomic_dec(&pd->usecnt); atomic_dec(&cq->usecnt); return ret; } EXPORT_SYMBOL(ib_destroy_wq_user); int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { if (!mr->device->ops.check_mr_status) return -EOPNOTSUPP; return mr->device->ops.check_mr_status(mr, check_mask, mr_status); } EXPORT_SYMBOL(ib_check_mr_status); int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port, int state) { if (!device->ops.set_vf_link_state) return -EOPNOTSUPP; return device->ops.set_vf_link_state(device, vf, port, state); } EXPORT_SYMBOL(ib_set_vf_link_state); int ib_get_vf_config(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *info) { if (!device->ops.get_vf_config) return -EOPNOTSUPP; return device->ops.get_vf_config(device, vf, port, info); } EXPORT_SYMBOL(ib_get_vf_config); int ib_get_vf_stats(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats) { if (!device->ops.get_vf_stats) return -EOPNOTSUPP; return device->ops.get_vf_stats(device, vf, port, stats); } EXPORT_SYMBOL(ib_get_vf_stats); int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid, int type) { if (!device->ops.set_vf_guid) return -EOPNOTSUPP; return device->ops.set_vf_guid(device, vf, port, guid, type); } EXPORT_SYMBOL(ib_set_vf_guid); int ib_get_vf_guid(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid) { if (!device->ops.get_vf_guid) return -EOPNOTSUPP; return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); } EXPORT_SYMBOL(ib_get_vf_guid); /** * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection * information) and set an appropriate memory region for registration. * @mr: memory region * @data_sg: dma mapped scatterlist for data * @data_sg_nents: number of entries in data_sg * @data_sg_offset: offset in bytes into data_sg * @meta_sg: dma mapped scatterlist for metadata * @meta_sg_nents: number of entries in meta_sg * @meta_sg_offset: offset in bytes into meta_sg * @page_size: page vector desired page size * * Constraints: * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY. * * Return: 0 on success. * * After this completes successfully, the memory region * is ready for registration. */ int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, int data_sg_nents, unsigned int *data_sg_offset, struct scatterlist *meta_sg, int meta_sg_nents, unsigned int *meta_sg_offset, unsigned int page_size) { if (unlikely(!mr->device->ops.map_mr_sg_pi || WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY))) return -EOPNOTSUPP; mr->page_size = page_size; return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents, data_sg_offset, meta_sg, meta_sg_nents, meta_sg_offset); } EXPORT_SYMBOL(ib_map_mr_sg_pi); /** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. * @mr: memory region * @sg: dma mapped scatterlist * @sg_nents: number of entries in sg * @sg_offset: offset in bytes into sg * @page_size: page vector desired page size * * Constraints: * * - The first sg element is allowed to have an offset. * - Each sg element must either be aligned to page_size or virtually * contiguous to the previous element. In case an sg element has a * non-contiguous offset, the mapping prefix will not include it. * - The last sg element is allowed to have length less than page_size. * - If sg_nents total byte length exceeds the mr max_num_sge * page_size * then only max_num_sg entries will be mapped. * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these * constraints holds and the page_size argument is ignored. * * Returns the number of sg elements that were mapped to the memory region. * * After this completes successfully, the memory region * is ready for registration. */ int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size) { if (unlikely(!mr->device->ops.map_mr_sg)) return -EOPNOTSUPP; mr->page_size = page_size; return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset); } EXPORT_SYMBOL(ib_map_mr_sg); /** * ib_sg_to_pages() - Convert the largest prefix of a sg list * to a page vector * @mr: memory region * @sgl: dma mapped scatterlist * @sg_nents: number of entries in sg * @sg_offset_p: ==== ======================================================= * IN start offset in bytes into sg * OUT offset in bytes for element n of the sg of the first * byte that has not been processed where n is the return * value of this function. * ==== ======================================================= * @set_page: driver page assignment function pointer * * Core service helper for drivers to convert the largest * prefix of given sg list to a page vector. The sg list * prefix converted is the prefix that meet the requirements * of ib_map_mr_sg. * * Returns the number of sg elements that were assigned to * a page vector. */ int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64)) { struct scatterlist *sg; u64 last_end_dma_addr = 0; unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; unsigned int last_page_off = 0; u64 page_mask = ~((u64)mr->page_size - 1); int i, ret; if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0]))) return -EINVAL; mr->iova = sg_dma_address(&sgl[0]) + sg_offset; mr->length = 0; for_each_sg(sgl, sg, sg_nents, i) { u64 dma_addr = sg_dma_address(sg) + sg_offset; u64 prev_addr = dma_addr; unsigned int dma_len = sg_dma_len(sg) - sg_offset; u64 end_dma_addr = dma_addr + dma_len; u64 page_addr = dma_addr & page_mask; /* * For the second and later elements, check whether either the * end of element i-1 or the start of element i is not aligned * on a page boundary. */ if (i && (last_page_off != 0 || page_addr != dma_addr)) { /* Stop mapping if there is a gap. */ if (last_end_dma_addr != dma_addr) break; /* * Coalesce this element with the last. If it is small * enough just update mr->length. Otherwise start * mapping from the next page. */ goto next_page; } do { ret = set_page(mr, page_addr); if (unlikely(ret < 0)) { sg_offset = prev_addr - sg_dma_address(sg); mr->length += prev_addr - dma_addr; if (sg_offset_p) *sg_offset_p = sg_offset; return i || sg_offset ? i : ret; } prev_addr = page_addr; next_page: page_addr += mr->page_size; } while (page_addr < end_dma_addr); mr->length += dma_len; last_end_dma_addr = end_dma_addr; last_page_off = end_dma_addr & ~page_mask; sg_offset = 0; } if (sg_offset_p) *sg_offset_p = 0; return i; } EXPORT_SYMBOL(ib_sg_to_pages); struct ib_drain_cqe { struct ib_cqe cqe; struct completion done; }; static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, cqe); complete(&cqe->done); } /* * Post a WR and block until its completion is reaped for the SQ. */ static void __ib_drain_sq(struct ib_qp *qp) { struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; struct ib_rdma_wr swr = { .wr = { .next = NULL, { .wr_cqe = &sdrain.cqe, }, .opcode = IB_WR_RDMA_WRITE, }, }; int ret; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; } sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); ret = ib_post_send(qp, &swr.wr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; } if (cq->poll_ctx == IB_POLL_DIRECT) while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0) ib_process_cq_direct(cq, -1); else wait_for_completion(&sdrain.done); } /* * Post a WR and block until its completion is reaped for the RQ. */ static void __ib_drain_rq(struct ib_qp *qp) { struct ib_cq *cq = qp->recv_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe rdrain; struct ib_recv_wr rwr = {}; int ret; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; } rwr.wr_cqe = &rdrain.cqe; rdrain.cqe.done = ib_drain_qp_done; init_completion(&rdrain.done); ret = ib_post_recv(qp, &rwr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; } if (cq->poll_ctx == IB_POLL_DIRECT) while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0) ib_process_cq_direct(cq, -1); else wait_for_completion(&rdrain.done); } /* * __ib_drain_srq() - Block until Last WQE Reached event arrives, or timeout * expires. * @qp: queue pair associated with SRQ to drain * * Quoting 10.3.1 Queue Pair and EE Context States: * * Note, for QPs that are associated with an SRQ, the Consumer should take the * QP through the Error State before invoking a Destroy QP or a Modify QP to the * Reset State. The Consumer may invoke the Destroy QP without first performing * a Modify QP to the Error State and waiting for the Affiliated Asynchronous * Last WQE Reached Event. However, if the Consumer does not wait for the * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment * leakage may occur. Therefore, it is good programming practice to tear down a * QP that is associated with an SRQ by using the following process: * * - Put the QP in the Error State * - Wait for the Affiliated Asynchronous Last WQE Reached Event; * - either: * drain the CQ by invoking the Poll CQ verb and either wait for CQ * to be empty or the number of Poll CQ operations has exceeded * CQ capacity size; * - or * post another WR that completes on the same CQ and wait for this * WR to return as a WC; * - and then invoke a Destroy QP or Reset QP. * * We use the first option. */ static void __ib_drain_srq(struct ib_qp *qp) { struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_cq *cq; int n, polled = 0; int ret; if (!qp->srq) { WARN_ONCE(1, "QP 0x%p is not associated with SRQ\n", qp); return; } ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { WARN_ONCE(ret, "failed to drain shared recv queue: %d\n", ret); return; } if (ib_srq_has_cq(qp->srq->srq_type)) { cq = qp->srq->ext.cq; } else if (qp->recv_cq) { cq = qp->recv_cq; } else { WARN_ONCE(1, "QP 0x%p has no CQ associated with SRQ\n", qp); return; } if (wait_for_completion_timeout(&qp->srq_completion, 60 * HZ) > 0) { while (polled != cq->cqe) { n = ib_process_cq_direct(cq, cq->cqe - polled); if (!n) return; polled += n; } } } /** * ib_drain_sq() - Block until all SQ CQEs have been consumed by the * application. * @qp: queue pair to drain * * If the device has a provider-specific drain function, then * call that. Otherwise call the generic drain function * __ib_drain_sq(). * * The caller must: * * ensure there is room in the CQ and SQ for the drain work request and * completion. * * allocate the CQ using ib_alloc_cq(). * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. */ void ib_drain_sq(struct ib_qp *qp) { if (qp->device->ops.drain_sq) qp->device->ops.drain_sq(qp); else __ib_drain_sq(qp); trace_cq_drain_complete(qp->send_cq); } EXPORT_SYMBOL(ib_drain_sq); /** * ib_drain_rq() - Block until all RQ CQEs have been consumed by the * application. * @qp: queue pair to drain * * If the device has a provider-specific drain function, then * call that. Otherwise call the generic drain function * __ib_drain_rq(). * * The caller must: * * ensure there is room in the CQ and RQ for the drain work request and * completion. * * allocate the CQ using ib_alloc_cq(). * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. */ void ib_drain_rq(struct ib_qp *qp) { if (qp->device->ops.drain_rq) qp->device->ops.drain_rq(qp); else __ib_drain_rq(qp); trace_cq_drain_complete(qp->recv_cq); } EXPORT_SYMBOL(ib_drain_rq); /** * ib_drain_qp() - Block until all CQEs have been consumed by the * application on both the RQ and SQ. * @qp: queue pair to drain * * The caller must: * * ensure there is room in the CQ(s), SQ, and RQ for drain work requests * and completions. * * allocate the CQs using ib_alloc_cq(). * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. */ void ib_drain_qp(struct ib_qp *qp) { ib_drain_sq(qp); if (!qp->srq) ib_drain_rq(qp); else __ib_drain_srq(qp); } EXPORT_SYMBOL(ib_drain_qp); struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)) { struct rdma_netdev_alloc_params params; struct net_device *netdev; int rc; if (!device->ops.rdma_netdev_get_params) return ERR_PTR(-EOPNOTSUPP); rc = device->ops.rdma_netdev_get_params(device, port_num, type, ¶ms); if (rc) return ERR_PTR(rc); netdev = alloc_netdev_mqs(params.sizeof_priv, name, name_assign_type, setup, params.txqs, params.rxqs); if (!netdev) return ERR_PTR(-ENOMEM); return netdev; } EXPORT_SYMBOL(rdma_alloc_netdev); int rdma_init_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), struct net_device *netdev) { struct rdma_netdev_alloc_params params; int rc; if (!device->ops.rdma_netdev_get_params) return -EOPNOTSUPP; rc = device->ops.rdma_netdev_get_params(device, port_num, type, ¶ms); if (rc) return rc; return params.initialize_rdma_netdev(device, port_num, netdev, params.param); } EXPORT_SYMBOL(rdma_init_netdev); void __rdma_block_iter_start(struct ib_block_iter *biter, struct scatterlist *sglist, unsigned int nents, unsigned long pgsz) { memset(biter, 0, sizeof(struct ib_block_iter)); biter->__sg = sglist; biter->__sg_nents = nents; /* Driver provides best block size to use */ biter->__pg_bit = __fls(pgsz); } EXPORT_SYMBOL(__rdma_block_iter_start); bool __rdma_block_iter_next(struct ib_block_iter *biter) { unsigned int block_offset; unsigned int delta; if (!biter->__sg_nents || !biter->__sg) return false; biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); delta = BIT_ULL(biter->__pg_bit) - block_offset; while (biter->__sg_nents && biter->__sg && sg_dma_len(biter->__sg) - biter->__sg_advance <= delta) { delta -= sg_dma_len(biter->__sg) - biter->__sg_advance; biter->__sg_advance = 0; biter->__sg = sg_next(biter->__sg); biter->__sg_nents--; } biter->__sg_advance += delta; return true; } EXPORT_SYMBOL(__rdma_block_iter_next); /** * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct * for the drivers. * @descs: array of static descriptors * @num_counters: number of elements in array * @lifespan: milliseconds between updates */ struct rdma_hw_stats *rdma_alloc_hw_stats_struct( const struct rdma_stat_desc *descs, int num_counters, unsigned long lifespan) { struct rdma_hw_stats *stats; stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL); if (!stats) return NULL; stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters), sizeof(*stats->is_disabled), GFP_KERNEL); if (!stats->is_disabled) goto err; stats->descs = descs; stats->num_counters = num_counters; stats->lifespan = msecs_to_jiffies(lifespan); mutex_init(&stats->lock); return stats; err: kfree(stats); return NULL; } EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); /** * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats * @stats: statistics to release */ void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) { if (!stats) return; kfree(stats->is_disabled); kfree(stats); } EXPORT_SYMBOL(rdma_free_hw_stats_struct); |
| 83 83 45 46 46 46 83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer MIDI-through client * Copyright (c) 1999-2000 by Takashi Iwai <tiwai@suse.de> */ #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <sound/core.h> #include "seq_clientmgr.h" #include <sound/initval.h> #include <sound/asoundef.h> /* Sequencer MIDI-through client This gives a simple midi-through client. All the normal input events are redirected to output port immediately. The routing can be done via aconnect program in alsa-utils. Each client has a static client number 14 (= SNDRV_SEQ_CLIENT_DUMMY). If you want to auto-load this module, you may add the following alias in your /etc/conf.modules file. alias snd-seq-client-14 snd-seq-dummy The module is loaded on demand for client 14, or /proc/asound/seq/ is accessed. If you don't need this module to be loaded, alias snd-seq-client-14 as "off". This will help modprobe. The number of ports to be created can be specified via the module parameter "ports". For example, to create four ports, add the following option in a configuration file under /etc/modprobe.d/: option snd-seq-dummy ports=4 The model option "duplex=1" enables duplex operation to the port. In duplex mode, a pair of ports are created instead of single port, and events are tunneled between pair-ports. For example, input to port A is sent to output port of another port B and vice versa. In duplex mode, each port has DUPLEX capability. */ MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("ALSA sequencer MIDI-through client"); MODULE_LICENSE("GPL"); MODULE_ALIAS("snd-seq-client-" __stringify(SNDRV_SEQ_CLIENT_DUMMY)); static int ports = 1; static bool duplex; module_param(ports, int, 0444); MODULE_PARM_DESC(ports, "number of ports to be created"); module_param(duplex, bool, 0444); MODULE_PARM_DESC(duplex, "create DUPLEX ports"); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) static int ump; module_param(ump, int, 0444); MODULE_PARM_DESC(ump, "UMP conversion (0: no convert, 1: MIDI 1.0, 2: MIDI 2.0)"); #endif struct snd_seq_dummy_port { int client; int port; int duplex; int connect; }; static int my_client = -1; /* * event input callback - just redirect events to subscribers */ static int dummy_input(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { struct snd_seq_dummy_port *p; struct snd_seq_event tmpev; p = private_data; if (ev->source.client == SNDRV_SEQ_CLIENT_SYSTEM || ev->type == SNDRV_SEQ_EVENT_KERNEL_ERROR) return 0; /* ignore system messages */ tmpev = *ev; if (p->duplex) tmpev.source.port = p->connect; else tmpev.source.port = p->port; tmpev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; return snd_seq_kernel_client_dispatch(p->client, &tmpev, atomic, hop); } /* * free_private callback */ static void dummy_free(void *private_data) { kfree(private_data); } /* * create a port */ static struct snd_seq_dummy_port __init * create_port(int idx, int type) { struct snd_seq_port_info pinfo; struct snd_seq_port_callback pcb; struct snd_seq_dummy_port *rec; rec = kzalloc(sizeof(*rec), GFP_KERNEL); if (!rec) return NULL; rec->client = my_client; rec->duplex = duplex; rec->connect = 0; memset(&pinfo, 0, sizeof(pinfo)); pinfo.addr.client = my_client; if (duplex) sprintf(pinfo.name, "Midi Through Port-%d:%c", idx, (type ? 'B' : 'A')); else sprintf(pinfo.name, "Midi Through Port-%d", idx); pinfo.capability = SNDRV_SEQ_PORT_CAP_READ | SNDRV_SEQ_PORT_CAP_SUBS_READ; pinfo.capability |= SNDRV_SEQ_PORT_CAP_WRITE | SNDRV_SEQ_PORT_CAP_SUBS_WRITE; if (duplex) pinfo.capability |= SNDRV_SEQ_PORT_CAP_DUPLEX; pinfo.direction = SNDRV_SEQ_PORT_DIR_BIDIRECTION; pinfo.type = SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | SNDRV_SEQ_PORT_TYPE_SOFTWARE | SNDRV_SEQ_PORT_TYPE_PORT; memset(&pcb, 0, sizeof(pcb)); pcb.owner = THIS_MODULE; pcb.event_input = dummy_input; pcb.private_free = dummy_free; pcb.private_data = rec; pinfo.kernel = &pcb; if (snd_seq_kernel_client_ctl(my_client, SNDRV_SEQ_IOCTL_CREATE_PORT, &pinfo) < 0) { kfree(rec); return NULL; } rec->port = pinfo.addr.port; return rec; } /* * register client and create ports */ static int __init register_client(void) { struct snd_seq_dummy_port *rec1, *rec2; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) struct snd_seq_client *client; #endif int i; if (ports < 1) { pr_err("ALSA: seq_dummy: invalid number of ports %d\n", ports); return -EINVAL; } /* create client */ my_client = snd_seq_create_kernel_client(NULL, SNDRV_SEQ_CLIENT_DUMMY, "Midi Through"); if (my_client < 0) return my_client; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) client = snd_seq_kernel_client_get(my_client); if (!client) return -EINVAL; switch (ump) { case 1: client->midi_version = SNDRV_SEQ_CLIENT_UMP_MIDI_1_0; break; case 2: client->midi_version = SNDRV_SEQ_CLIENT_UMP_MIDI_2_0; break; default: /* don't convert events but just pass-through */ client->filter = SNDRV_SEQ_FILTER_NO_CONVERT; break; } snd_seq_kernel_client_put(client); #endif /* create ports */ for (i = 0; i < ports; i++) { rec1 = create_port(i, 0); if (rec1 == NULL) { snd_seq_delete_kernel_client(my_client); return -ENOMEM; } if (duplex) { rec2 = create_port(i, 1); if (rec2 == NULL) { snd_seq_delete_kernel_client(my_client); return -ENOMEM; } rec1->connect = rec2->port; rec2->connect = rec1->port; } } return 0; } /* * delete client if exists */ static void __exit delete_client(void) { if (my_client >= 0) snd_seq_delete_kernel_client(my_client); } /* * Init part */ static int __init alsa_seq_dummy_init(void) { return register_client(); } static void __exit alsa_seq_dummy_exit(void) { delete_client(); } module_init(alsa_seq_dummy_init) module_exit(alsa_seq_dummy_exit) |
| 46 46 46 46 37 46 46 14 14 14 55 50 12 12 12 28 28 28 19 19 28 16 16 23 18 8 7 2 6 4 13 11 9 13 23 17 16 15 14 5 4 1 13 11 10 13 13 16 1 11 10 1 10 1 87 87 71 12 63 63 11 1 16 15 15 15 7 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 | // SPDX-License-Identifier: GPL-2.0-only /* * fs/eventfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * */ #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched/signal.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/anon_inodes.h> #include <linux/syscalls.h> #include <linux/export.h> #include <linux/kref.h> #include <linux/eventfd.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/idr.h> #include <linux/uio.h> static DEFINE_IDA(eventfd_ida); struct eventfd_ctx { struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a * wakeup is performed on "wqh". If EFD_SEMAPHORE flag was not * specified, a read(2) will return the "count" value to userspace, * and will reset "count" to zero. The kernel side eventfd_signal() * also, adds to the "count" counter and issue a wakeup. */ __u64 count; unsigned int flags; int id; }; /** * eventfd_signal_mask - Increment the event counter * @ctx: [in] Pointer to the eventfd context. * @mask: [in] poll mask * * This function is supposed to be called by the kernel in paths that do not * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX * value, and we signal this as overflow condition by returning a EPOLLERR * to poll(2). */ void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask) { unsigned long flags; /* * Deadlock or stack overflow issues can happen if we recurse here * through waitqueue wakeup handlers. If the caller users potentially * nested waitqueues with custom wakeup handlers, then it should * check eventfd_signal_allowed() before calling this function. If * it returns false, the eventfd_signal() call should be deferred to a * safe context. */ if (WARN_ON_ONCE(current->in_eventfd)) return; spin_lock_irqsave(&ctx->wqh.lock, flags); current->in_eventfd = 1; if (ctx->count < ULLONG_MAX) ctx->count++; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask); current->in_eventfd = 0; spin_unlock_irqrestore(&ctx->wqh.lock, flags); } EXPORT_SYMBOL_GPL(eventfd_signal_mask); static void eventfd_free_ctx(struct eventfd_ctx *ctx) { if (ctx->id >= 0) ida_free(&eventfd_ida, ctx->id); kfree(ctx); } static void eventfd_free(struct kref *kref) { struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); eventfd_free_ctx(ctx); } /** * eventfd_ctx_put - Releases a reference to the internal eventfd context. * @ctx: [in] Pointer to eventfd context. * * The eventfd context reference must have been previously acquired either * with eventfd_ctx_fdget() or eventfd_ctx_fileget(). */ void eventfd_ctx_put(struct eventfd_ctx *ctx) { kref_put(&ctx->kref, eventfd_free); } EXPORT_SYMBOL_GPL(eventfd_ctx_put); static int eventfd_release(struct inode *inode, struct file *file) { struct eventfd_ctx *ctx = file->private_data; wake_up_poll(&ctx->wqh, EPOLLHUP); eventfd_ctx_put(ctx); return 0; } static __poll_t eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; u64 count; poll_wait(file, &ctx->wqh, wait); /* * All writes to ctx->count occur within ctx->wqh.lock. This read * can be done outside ctx->wqh.lock because we know that poll_wait * takes that lock (through add_wait_queue) if our caller will sleep. * * The read _can_ therefore seep into add_wait_queue's critical * section, but cannot move above it! add_wait_queue's spin_lock acts * as an acquire barrier and ensures that the read be ordered properly * against the writes. The following CAN happen and is safe: * * poll write * ----------------- ------------ * lock ctx->wqh.lock (in poll_wait) * count = ctx->count * __add_wait_queue * unlock ctx->wqh.lock * lock ctx->qwh.lock * ctx->count += n * if (waitqueue_active) * wake_up_locked_poll * unlock ctx->qwh.lock * eventfd_poll returns 0 * * but the following, which would miss a wakeup, cannot happen: * * poll write * ----------------- ------------ * count = ctx->count (INVALID!) * lock ctx->qwh.lock * ctx->count += n * **waitqueue_active is false** * **no wake_up_locked_poll!** * unlock ctx->qwh.lock * lock ctx->wqh.lock (in poll_wait) * __add_wait_queue * unlock ctx->wqh.lock * eventfd_poll returns 0 */ count = READ_ONCE(ctx->count); if (count > 0) events |= EPOLLIN; if (count == ULLONG_MAX) events |= EPOLLERR; if (ULLONG_MAX - 1 > count) events |= EPOLLOUT; return events; } void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) { lockdep_assert_held(&ctx->wqh.lock); *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count; ctx->count -= *cnt; } EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); /** * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. * @ctx: [in] Pointer to eventfd context. * @wait: [in] Wait queue to be removed. * @cnt: [out] Pointer to the 64-bit counter value. * * Returns %0 if successful, or the following error codes: * * -EAGAIN : The operation would have blocked. * * This is used to atomically remove a wait queue entry from the eventfd wait * queue head, and read/reset the counter value. */ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt) { unsigned long flags; spin_lock_irqsave(&ctx->wqh.lock, flags); eventfd_ctx_do_read(ctx, cnt); __remove_wait_queue(&ctx->wqh, wait); if (*cnt != 0 && waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLOUT); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return *cnt != 0 ? 0 : -EAGAIN; } EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct eventfd_ctx *ctx = file->private_data; __u64 ucnt = 0; if (iov_iter_count(to) < sizeof(ucnt)) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (!ctx->count) { if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) { spin_unlock_irq(&ctx->wqh.lock); return -EAGAIN; } if (wait_event_interruptible_locked_irq(ctx->wqh, ctx->count)) { spin_unlock_irq(&ctx->wqh.lock); return -ERESTARTSYS; } } eventfd_ctx_do_read(ctx, &ucnt); current->in_eventfd = 1; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLOUT); current->in_eventfd = 0; spin_unlock_irq(&ctx->wqh.lock); if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt))) return -EFAULT; return sizeof(ucnt); } static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct eventfd_ctx *ctx = file->private_data; ssize_t res; __u64 ucnt; if (count != sizeof(ucnt)) return -EINVAL; if (copy_from_user(&ucnt, buf, sizeof(ucnt))) return -EFAULT; if (ucnt == ULLONG_MAX) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; if (ULLONG_MAX - ctx->count > ucnt) res = sizeof(ucnt); else if (!(file->f_flags & O_NONBLOCK)) { res = wait_event_interruptible_locked_irq(ctx->wqh, ULLONG_MAX - ctx->count > ucnt); if (!res) res = sizeof(ucnt); } if (likely(res > 0)) { ctx->count += ucnt; current->in_eventfd = 1; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN); current->in_eventfd = 0; } spin_unlock_irq(&ctx->wqh.lock); return res; } #ifdef CONFIG_PROC_FS static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) { struct eventfd_ctx *ctx = f->private_data; __u64 cnt; spin_lock_irq(&ctx->wqh.lock); cnt = ctx->count; spin_unlock_irq(&ctx->wqh.lock); seq_printf(m, "eventfd-count: %16llx\n" "eventfd-id: %d\n" "eventfd-semaphore: %d\n", cnt, ctx->id, !!(ctx->flags & EFD_SEMAPHORE)); } #endif static const struct file_operations eventfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, .poll = eventfd_poll, .read_iter = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, }; /** * eventfd_fget - Acquire a reference of an eventfd file descriptor. * @fd: [in] Eventfd file descriptor. * * Returns a pointer to the eventfd file structure in case of success, or the * following error pointer: * * -EBADF : Invalid @fd file descriptor. * -EINVAL : The @fd file descriptor is not an eventfd file. */ struct file *eventfd_fget(int fd) { struct file *file; file = fget(fd); if (!file) return ERR_PTR(-EBADF); if (file->f_op != &eventfd_fops) { fput(file); return ERR_PTR(-EINVAL); } return file; } EXPORT_SYMBOL_GPL(eventfd_fget); /** * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context. * @fd: [in] Eventfd file descriptor. * * Returns a pointer to the internal eventfd context, otherwise the error * pointers returned by the following functions: * * eventfd_fget */ struct eventfd_ctx *eventfd_ctx_fdget(int fd) { CLASS(fd, f)(fd); if (fd_empty(f)) return ERR_PTR(-EBADF); return eventfd_ctx_fileget(fd_file(f)); } EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); /** * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context. * @file: [in] Eventfd file pointer. * * Returns a pointer to the internal eventfd context, otherwise the error * pointer: * * -EINVAL : The @fd file descriptor is not an eventfd file. */ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) { struct eventfd_ctx *ctx; if (file->f_op != &eventfd_fops) return ERR_PTR(-EINVAL); ctx = file->private_data; kref_get(&ctx->kref); return ctx; } EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); static int do_eventfd(unsigned int count, int flags) { struct eventfd_ctx *ctx __free(kfree) = NULL; /* Check the EFD_* constants for consistency. */ BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); BUILD_BUG_ON(EFD_SEMAPHORE != (1 << 0)); if (flags & ~EFD_FLAGS_SET) return -EINVAL; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; flags &= EFD_SHARED_FCNTL_FLAGS; flags |= O_RDWR; FD_PREPARE(fdf, flags, anon_inode_getfile_fmode("[eventfd]", &eventfd_fops, ctx, flags, FMODE_NOWAIT)); if (fdf.err) return fdf.err; ctx->id = ida_alloc(&eventfd_ida, GFP_KERNEL); retain_and_null_ptr(ctx); return fd_publish(fdf); } SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { return do_eventfd(count, flags); } SYSCALL_DEFINE1(eventfd, unsigned int, count) { return do_eventfd(count, 0); } |
| 103 102 102 102 103 101 102 102 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | // SPDX-License-Identifier: LGPL-2.0+ /* * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. * This file is part of the GNU C Library. * Contributed by Paul Eggert (eggert@twinsun.com). * * The GNU C Library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * The GNU C Library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with the GNU C Library; see the file COPYING.LIB. If not, * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /* * Converts the calendar time to broken-down time representation * * 2009-7-14: * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com> * 2021-06-02: * Reimplemented by Cassio Neri <cassio.neri@gmail.com> */ #include <linux/time.h> #include <linux/module.h> #include <linux/kernel.h> #define SECS_PER_HOUR (60 * 60) #define SECS_PER_DAY (SECS_PER_HOUR * 24) /** * time64_to_tm - converts the calendar time to local broken-down time * * @totalsecs: the number of seconds elapsed since 00:00:00 on January 1, 1970, * Coordinated Universal Time (UTC). * @offset: offset seconds adding to totalsecs. * @result: pointer to struct tm variable to receive broken-down time */ void time64_to_tm(time64_t totalsecs, int offset, struct tm *result) { u32 u32tmp, day_of_century, year_of_century, day_of_year, month, day; u64 u64tmp, udays, century, year; bool is_Jan_or_Feb, is_leap_year; long days, rem; int remainder; days = div_s64_rem(totalsecs, SECS_PER_DAY, &remainder); rem = remainder; rem += offset; while (rem < 0) { rem += SECS_PER_DAY; --days; } while (rem >= SECS_PER_DAY) { rem -= SECS_PER_DAY; ++days; } result->tm_hour = rem / SECS_PER_HOUR; rem %= SECS_PER_HOUR; result->tm_min = rem / 60; result->tm_sec = rem % 60; /* January 1, 1970 was a Thursday. */ result->tm_wday = (4 + days) % 7; if (result->tm_wday < 0) result->tm_wday += 7; /* * The following algorithm is, basically, Proposition 6.3 of Neri * and Schneider [1]. In a few words: it works on the computational * (fictitious) calendar where the year starts in March, month = 2 * (*), and finishes in February, month = 13. This calendar is * mathematically convenient because the day of the year does not * depend on whether the year is leap or not. For instance: * * March 1st 0-th day of the year; * ... * April 1st 31-st day of the year; * ... * January 1st 306-th day of the year; (Important!) * ... * February 28th 364-th day of the year; * February 29th 365-th day of the year (if it exists). * * After having worked out the date in the computational calendar * (using just arithmetics) it's easy to convert it to the * corresponding date in the Gregorian calendar. * * [1] "Euclidean Affine Functions and Applications to Calendar * Algorithms". https://arxiv.org/abs/2102.06959 * * (*) The numbering of months follows tm more closely and thus, * is slightly different from [1]. */ udays = ((u64) days) + 2305843009213814918ULL; u64tmp = 4 * udays + 3; century = div64_u64_rem(u64tmp, 146097, &u64tmp); day_of_century = (u32) (u64tmp / 4); u32tmp = 4 * day_of_century + 3; u64tmp = 2939745ULL * u32tmp; year_of_century = upper_32_bits(u64tmp); day_of_year = lower_32_bits(u64tmp) / 2939745 / 4; year = 100 * century + year_of_century; is_leap_year = year_of_century ? !(year_of_century % 4) : !(century % 4); u32tmp = 2141 * day_of_year + 132377; month = u32tmp >> 16; day = ((u16) u32tmp) / 2141; /* * Recall that January 1st is the 306-th day of the year in the * computational (not Gregorian) calendar. */ is_Jan_or_Feb = day_of_year >= 306; /* Convert to the Gregorian calendar and adjust to Unix time. */ year = year + is_Jan_or_Feb - 6313183731940000ULL; month = is_Jan_or_Feb ? month - 12 : month; day = day + 1; day_of_year += is_Jan_or_Feb ? -306 : 31 + 28 + is_leap_year; /* Convert to tm's format. */ result->tm_year = (long) (year - 1900); result->tm_mon = (int) month; result->tm_mday = (int) day; result->tm_yday = (int) day_of_year; } EXPORT_SYMBOL(time64_to_tm); |
| 176 151 135 294 100 39 60 11 12 18 120 9 10 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 | /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. */ #ifndef _FS_FUSE_I_H #define _FS_FUSE_I_H #ifndef pr_fmt # define pr_fmt(fmt) "fuse: " fmt #endif #include <linux/fuse.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/mm.h> #include <linux/backing-dev.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/rbtree.h> #include <linux/poll.h> #include <linux/workqueue.h> #include <linux/kref.h> #include <linux/xattr.h> #include <linux/pid_namespace.h> #include <linux/refcount.h> #include <linux/user_namespace.h> /** Default max number of pages that can be used in a single read request */ #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 /** Bias for fi->writectr, meaning new writepages must not be sent */ #define FUSE_NOWRITE INT_MIN /** Maximum length of a filename, not including terminating null */ /* maximum, small enough for FUSE_MIN_READ_BUFFER*/ #define FUSE_NAME_LOW_MAX 1024 /* maximum, but needs a request buffer > FUSE_MIN_READ_BUFFER */ #define FUSE_NAME_MAX (PATH_MAX - 1) /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 /* Frequency (in seconds) of request timeout checks, if opted into */ #define FUSE_TIMEOUT_TIMER_FREQ 15 /** Frequency (in jiffies) of request timeout checks, if opted into */ extern const unsigned long fuse_timeout_timer_freq; /* * Dentries invalidation workqueue period, in seconds. The value of this * parameter shall be >= FUSE_DENTRY_INVAL_FREQ_MIN seconds, or 0 (zero), in * which case no workqueue will be created. */ extern unsigned inval_wq __read_mostly; /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; /* * Default timeout (in seconds) for the server to reply to a request * before the connection is aborted, if no timeout was specified on mount. */ extern unsigned int fuse_default_req_timeout; /* * Max timeout (in seconds) for the server to reply to a request before * the connection is aborted. */ extern unsigned int fuse_max_req_timeout; /** List of active connections */ extern struct list_head fuse_conn_list; /** Global mutex protecting fuse_conn_list and the control filesystem */ extern struct mutex fuse_mutex; /** Module parameters */ extern unsigned int max_user_bgreq; extern unsigned int max_user_congthresh; /* One forget request */ struct fuse_forget_link { struct fuse_forget_one forget_one; struct fuse_forget_link *next; }; /* Submount lookup tracking */ struct fuse_submount_lookup { /** Refcount */ refcount_t count; /** Unique ID, which identifies the inode between userspace * and kernel */ u64 nodeid; /** The request used for sending the FORGET message */ struct fuse_forget_link *forget; }; /** Container for data related to mapping to backing file */ struct fuse_backing { struct file *file; struct cred *cred; /** refcount */ refcount_t count; struct rcu_head rcu; }; /** FUSE inode */ struct fuse_inode { /** Inode data */ struct inode inode; /** Unique ID, which identifies the inode between userspace * and kernel */ u64 nodeid; /** Number of lookups on this inode */ u64 nlookup; /** The request used for sending the FORGET message */ struct fuse_forget_link *forget; /** Time in jiffies until the file attributes are valid */ u64 i_time; /* Which attributes are invalid */ u32 inval_mask; /** The sticky bit in inode->i_mode may have been removed, so preserve the original mode */ umode_t orig_i_mode; /* Cache birthtime */ struct timespec64 i_btime; /** 64 bit inode number */ u64 orig_ino; /** Version of last attribute change */ u64 attr_version; union { /* read/write io cache (regular file only) */ struct { /* Files usable in writepage. Protected by fi->lock */ struct list_head write_files; /* Writepages pending on truncate or fsync */ struct list_head queued_writes; /* Number of sent writes, a negative bias * (FUSE_NOWRITE) means more writes are blocked */ int writectr; /** Number of files/maps using page cache */ int iocachectr; /* Waitq for writepage completion */ wait_queue_head_t page_waitq; /* waitq for direct-io completion */ wait_queue_head_t direct_io_waitq; }; /* readdir cache (directory only) */ struct { /* true if fully cached */ bool cached; /* size of cache */ loff_t size; /* position at end of cache (position of next entry) */ loff_t pos; /* version of the cache */ u64 version; /* modification time of directory when cache was * started */ struct timespec64 mtime; /* iversion of directory when cache was started */ u64 iversion; /* protects above fields */ spinlock_t lock; } rdc; }; /** Miscellaneous bits describing inode state */ unsigned long state; /** Lock for serializing lookup and readdir for back compatibility*/ struct mutex mutex; /** Lock to protect write related fields */ spinlock_t lock; #ifdef CONFIG_FUSE_DAX /* * Dax specific inode data */ struct fuse_inode_dax *dax; #endif /** Submount specific lookup tracking */ struct fuse_submount_lookup *submount_lookup; #ifdef CONFIG_FUSE_PASSTHROUGH /** Reference to backing file in passthrough mode */ struct fuse_backing *fb; #endif /* * The underlying inode->i_blkbits value will not be modified, * so preserve the blocksize specified by the server. */ u8 cached_i_blkbits; }; /** FUSE inode state bits */ enum { /** Advise readdirplus */ FUSE_I_ADVISE_RDPLUS, /** Initialized with readdirplus */ FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, /* Bad inode */ FUSE_I_BAD, /* Has btime */ FUSE_I_BTIME, /* Wants or already has page cache IO */ FUSE_I_CACHE_IO_MODE, /* * Client has exclusive access to the inode, either because fs is local * or the fuse server has an exclusive "lease" on distributed fs */ FUSE_I_EXCLUSIVE, }; struct fuse_conn; struct fuse_mount; union fuse_file_args; /** FUSE specific file data */ struct fuse_file { /** Fuse connection for this file */ struct fuse_mount *fm; /* Argument space reserved for open/release */ union fuse_file_args *args; /** Kernel file handle guaranteed to be unique */ u64 kh; /** File handle used by userspace */ u64 fh; /** Node id of this file */ u64 nodeid; /** Refcount */ refcount_t count; /** FOPEN_* flags returned by open */ u32 open_flags; /** Entry on inode's write_files list */ struct list_head write_entry; /* Readdir related */ struct { /* Dir stream position */ loff_t pos; /* Offset in cache */ loff_t cache_off; /* Version of cache we are reading */ u64 version; } readdir; /** RB node to be linked on fuse_conn->polled_files */ struct rb_node polled_node; /** Wait queue head for poll */ wait_queue_head_t poll_wait; /** Does file hold a fi->iocachectr refcount? */ enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode; #ifdef CONFIG_FUSE_PASSTHROUGH /** Reference to backing file in passthrough mode */ struct file *passthrough; const struct cred *cred; #endif /** Has flock been performed on this file? */ bool flock:1; }; /** One input argument of a request */ struct fuse_in_arg { unsigned size; const void *value; }; /** One output argument of a request */ struct fuse_arg { unsigned size; void *value; }; /** FUSE folio descriptor */ struct fuse_folio_desc { unsigned int length; unsigned int offset; }; struct fuse_args { uint64_t nodeid; uint32_t opcode; uint8_t in_numargs; uint8_t out_numargs; uint8_t ext_idx; bool force:1; bool noreply:1; bool nocreds:1; bool in_pages:1; bool out_pages:1; bool user_pages:1; bool out_argvar:1; bool page_zeroing:1; bool page_replace:1; bool may_block:1; bool is_ext:1; bool is_pinned:1; bool invalidate_vmap:1; struct fuse_in_arg in_args[4]; struct fuse_arg out_args[2]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); /* Used for kvec iter backed by vmalloc address */ void *vmap_base; }; struct fuse_args_pages { struct fuse_args args; struct folio **folios; struct fuse_folio_desc *descs; unsigned int num_folios; }; struct fuse_release_args { struct fuse_args args; struct fuse_release_in inarg; struct inode *inode; }; union fuse_file_args { /* Used during open() */ struct fuse_open_out open_outarg; /* Used during release() */ struct fuse_release_args release_args; }; #define FUSE_ARGS(args) struct fuse_args args = {} /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { struct kref refcnt; int async; spinlock_t lock; unsigned reqs; ssize_t bytes; size_t size; __u64 offset; bool write; bool should_dirty; int err; struct kiocb *iocb; struct completion *done; bool blocking; }; #define FUSE_IO_PRIV_SYNC(i) \ { \ .refcnt = KREF_INIT(1), \ .async = 0, \ .iocb = i, \ } /** * Request flags * * FR_ISREPLY: set if the request has reply * FR_FORCE: force sending of the request even if interrupted * FR_BACKGROUND: request is sent in the background * FR_WAITING: request is counted as "waiting" * FR_ABORTED: the request was aborted * FR_INTERRUPTED: the request has been interrupted * FR_LOCKED: data is being copied to/from the request * FR_PENDING: request is not yet in userspace * FR_SENT: request is in userspace, waiting for an answer * FR_FINISHED: request is finished * FR_PRIVATE: request is on private list * FR_ASYNC: request is asynchronous * FR_URING: request is handled through fuse-io-uring */ enum fuse_req_flag { FR_ISREPLY, FR_FORCE, FR_BACKGROUND, FR_WAITING, FR_ABORTED, FR_INTERRUPTED, FR_LOCKED, FR_PENDING, FR_SENT, FR_FINISHED, FR_PRIVATE, FR_ASYNC, FR_URING, }; /** * A request to the client * * .waitq.lock protects the following fields: * - FR_ABORTED * - FR_LOCKED (may also be modified under fc->lock, tested under both) */ struct fuse_req { /** This can be on either pending processing or io lists in fuse_conn */ struct list_head list; /** Entry on the interrupts list */ struct list_head intr_entry; /* Input/output arguments */ struct fuse_args *args; /** refcount */ refcount_t count; /* Request flags, updated with test/set/clear_bit() */ unsigned long flags; /* The request input header */ struct { struct fuse_in_header h; } in; /* The request output header */ struct { struct fuse_out_header h; } out; /** Used to wake up the task waiting for completion of request*/ wait_queue_head_t waitq; #if IS_ENABLED(CONFIG_VIRTIO_FS) /** virtio-fs's physically contiguous buffer for in and out args */ void *argbuf; #endif /** fuse_mount this request belongs to */ struct fuse_mount *fm; #ifdef CONFIG_FUSE_IO_URING void *ring_entry; void *ring_queue; #endif /** When (in jiffies) the request was created */ unsigned long create_time; }; struct fuse_iqueue; /** * Input queue callbacks * * Input queue signalling is device-specific. For example, the /dev/fuse file * uses fiq->waitq and fasync to wake processes that are waiting on queue * readiness. These callbacks allow other device types to respond to input * queue activity. */ struct fuse_iqueue_ops { /** * Send one forget */ void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link); /** * Send interrupt for request */ void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req); /** * Send one request */ void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req); /** * Clean up when fuse_iqueue is destroyed */ void (*release)(struct fuse_iqueue *fiq); }; /** /dev/fuse input queue operations */ extern const struct fuse_iqueue_ops fuse_dev_fiq_ops; struct fuse_iqueue { /** Connection established */ unsigned connected; /** Lock protecting accesses to members of this structure */ spinlock_t lock; /** Readers of the connection are waiting on this */ wait_queue_head_t waitq; /** The next unique request id */ u64 reqctr; /** The list of pending requests */ struct list_head pending; /** Pending interrupts */ struct list_head interrupts; /** Queue of pending forgets */ struct fuse_forget_link forget_list_head; struct fuse_forget_link *forget_list_tail; /** Batching of FORGET requests (positive indicates FORGET batch) */ int forget_batch; /** O_ASYNC requests */ struct fasync_struct *fasync; /** Device-specific callbacks */ const struct fuse_iqueue_ops *ops; /** Device-specific state */ void *priv; }; #define FUSE_PQ_HASH_BITS 8 #define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS) struct fuse_pqueue { /** Connection established */ unsigned connected; /** Lock protecting accessess to members of this structure */ spinlock_t lock; /** Hash table of requests being processed */ struct list_head *processing; /** The list of requests under I/O */ struct list_head io; }; /** * Fuse device instance */ struct fuse_dev { /** Fuse connection for this device */ struct fuse_conn *fc; /** Processing queue */ struct fuse_pqueue pq; /** list entry on fc->devices */ struct list_head entry; }; enum fuse_dax_mode { FUSE_DAX_INODE_DEFAULT, /* default */ FUSE_DAX_ALWAYS, /* "-o dax=always" */ FUSE_DAX_NEVER, /* "-o dax=never" */ FUSE_DAX_INODE_USER, /* "-o dax=inode" */ }; static inline bool fuse_is_inode_dax_mode(enum fuse_dax_mode mode) { return mode == FUSE_DAX_INODE_DEFAULT || mode == FUSE_DAX_INODE_USER; } struct fuse_fs_context { int fd; struct file *file; unsigned int rootmode; kuid_t user_id; kgid_t group_id; bool is_bdev:1; bool fd_present:1; bool rootmode_present:1; bool user_id_present:1; bool group_id_present:1; bool default_permissions:1; bool allow_other:1; bool destroy:1; bool no_control:1; bool no_force_umount:1; bool legacy_opts_show:1; enum fuse_dax_mode dax_mode; unsigned int max_read; unsigned int blksize; const char *subtype; /* DAX device, may be NULL */ struct dax_device *dax_dev; /* fuse_dev pointer to fill in, should contain NULL on entry */ void **fudptr; }; struct fuse_sync_bucket { /* count is a possible scalability bottleneck */ atomic_t count; wait_queue_head_t waitq; struct rcu_head rcu; }; /** * A Fuse connection. * * This structure is created, when the root filesystem is mounted, and * is destroyed, when the client device is closed and the last * fuse_mount is destroyed. */ struct fuse_conn { /** Lock protecting accessess to members of this structure */ spinlock_t lock; /** Refcount */ refcount_t count; /** Number of fuse_dev's */ atomic_t dev_count; /** Current epoch for up-to-date dentries */ atomic_t epoch; struct work_struct epoch_work; struct rcu_head rcu; /** The user id for this mount */ kuid_t user_id; /** The group id for this mount */ kgid_t group_id; /** The pid namespace for this mount */ struct pid_namespace *pid_ns; /** The user namespace for this mount */ struct user_namespace *user_ns; /** Maximum read size */ unsigned max_read; /** Maximum write size */ unsigned max_write; /** Maximum number of pages that can be used in a single request */ unsigned int max_pages; /** Constrain ->max_pages to this value during feature negotiation */ unsigned int max_pages_limit; /** Input queue */ struct fuse_iqueue iq; /** The next unique kernel file handle */ atomic64_t khctr; /** rbtree of fuse_files waiting for poll events indexed by ph */ struct rb_root polled_files; /** Maximum number of outstanding background requests */ unsigned max_background; /** Number of background requests at which congestion starts */ unsigned congestion_threshold; /** Number of requests currently in the background */ unsigned num_background; /** Number of background requests currently queued for userspace */ unsigned active_background; /** The list of background requests set aside for later queuing */ struct list_head bg_queue; /** Protects: max_background, congestion_threshold, num_background, * active_background, bg_queue, blocked */ spinlock_t bg_lock; /** Flag indicating that INIT reply has been received. Allocating * any fuse request will be suspended until the flag is set */ int initialized; /** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests */ int blocked; /** waitq for blocked connection */ wait_queue_head_t blocked_waitq; /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; /** Connection aborted via sysfs */ bool aborted; /** Connection failed (version mismatch). Cannot race with setting other bitfields since it is only set once in INIT reply, before any other request, and never cleared */ unsigned conn_error:1; /** Connection successful. Only set in INIT */ unsigned conn_init:1; /** Do readahead asynchronously? Only set in INIT */ unsigned async_read:1; /** Return an unique read error after abort. Only set in INIT */ unsigned abort_err:1; /** Do not send separate SETATTR request before open(O_TRUNC) */ unsigned atomic_o_trunc:1; /** Filesystem supports NFS exporting. Only set in INIT */ unsigned export_support:1; /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; /** allow parallel lookups and readdir (default is serialized) */ unsigned parallel_dirops:1; /** handle fs handles killing suid/sgid/cap on write/chown/trunc */ unsigned handle_killpriv:1; /** cache READLINK responses in page cache */ unsigned cache_symlinks:1; /* show legacy mount options */ unsigned int legacy_opts_show:1; /* * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on * write/trunc only if caller did not have CAP_FSETID. sgid is killed * on write/truncate only if caller did not have CAP_FSETID as well as * file has group execute permission. */ unsigned handle_killpriv_v2:1; /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction */ /** Is open/release not implemented by fs? */ unsigned no_open:1; /** Is opendir/releasedir not implemented by fs? */ unsigned no_opendir:1; /** Is fsync not implemented by fs? */ unsigned no_fsync:1; /** Is fsyncdir not implemented by fs? */ unsigned no_fsyncdir:1; /** Is flush not implemented by fs? */ unsigned no_flush:1; /** Is setxattr not implemented by fs? */ unsigned no_setxattr:1; /** Does file server support extended setxattr */ unsigned setxattr_ext:1; /** Is getxattr not implemented by fs? */ unsigned no_getxattr:1; /** Is listxattr not implemented by fs? */ unsigned no_listxattr:1; /** Is removexattr not implemented by fs? */ unsigned no_removexattr:1; /** Are posix file locking primitives not implemented by fs? */ unsigned no_lock:1; /** Is access not implemented by fs? */ unsigned no_access:1; /** Is create not implemented by fs? */ unsigned no_create:1; /** Is interrupt not implemented by fs? */ unsigned no_interrupt:1; /** Is bmap not implemented by fs? */ unsigned no_bmap:1; /** Is poll not implemented by fs? */ unsigned no_poll:1; /** Do multi-page cached writes */ unsigned big_writes:1; /** Don't apply umask to creation modes */ unsigned dont_mask:1; /** Are BSD file locking primitives not implemented by fs? */ unsigned no_flock:1; /** Is fallocate not implemented by fs? */ unsigned no_fallocate:1; /** Is rename with flags implemented by fs? */ unsigned no_rename2:1; /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; /** Filesystem is fully responsible for page cache invalidation. */ unsigned explicit_inval_data:1; /** Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; /** Does the filesystem want adaptive readdirplus? */ unsigned readdirplus_auto:1; /** Does the filesystem support asynchronous direct-IO submission? */ unsigned async_dio:1; /** Is lseek not implemented by fs? */ unsigned no_lseek:1; /** Does the filesystem support posix acls? */ unsigned posix_acl:1; /** Check permissions based on the file mode or not? */ unsigned default_permissions:1; /** Allow other than the mounter user to access the filesystem ? */ unsigned allow_other:1; /** Does the filesystem support copy_file_range? */ unsigned no_copy_file_range:1; /** Does the filesystem support copy_file_range_64? */ unsigned no_copy_file_range_64:1; /* Send DESTROY request */ unsigned int destroy:1; /* Delete dentries that have gone stale */ unsigned int delete_stale:1; /** Do not create entry in fusectl fs */ unsigned int no_control:1; /** Do not allow MNT_FORCE umount */ unsigned int no_force_umount:1; /* Auto-mount submounts announced by the server */ unsigned int auto_submounts:1; /* Propagate syncfs() to server */ unsigned int sync_fs:1; /* Initialize security xattrs when creating a new inode */ unsigned int init_security:1; /* Add supplementary group info when creating a new inode */ unsigned int create_supp_group:1; /* Does the filesystem support per inode DAX? */ unsigned int inode_dax:1; /* Is tmpfile not implemented by fs? */ unsigned int no_tmpfile:1; /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ unsigned int direct_io_allow_mmap:1; /* Is statx not implemented by fs? */ unsigned int no_statx:1; /** Passthrough support for read/write IO */ unsigned int passthrough:1; /* Use pages instead of pointer for kernel I/O */ unsigned int use_pages_for_kvec_io:1; /* Is link not implemented by fs? */ unsigned int no_link:1; /* Is synchronous FUSE_INIT allowed? */ unsigned int sync_init:1; /* Use io_uring for communication */ unsigned int io_uring; /** Maximum stack depth for passthrough backing files */ int max_stack_depth; /** The number of requests waiting for completion */ atomic_t num_waiting; /** Negotiated minor version */ unsigned minor; /** Entry on the fuse_conn_list */ struct list_head entry; /** Device ID from the root super block */ dev_t dev; /** Key for lock owner ID scrambling */ u32 scramble_key[4]; /** Version counter for attribute changes */ atomic64_t attr_version; /** Version counter for evict inode */ atomic64_t evict_ctr; /* maximum file name length */ u32 name_max; /** Called on final put */ void (*release)(struct fuse_conn *); /** * Read/write semaphore to hold when accessing the sb of any * fuse_mount belonging to this connection */ struct rw_semaphore killsb; /** List of device instances belonging to this connection */ struct list_head devices; #ifdef CONFIG_FUSE_DAX /* Dax mode */ enum fuse_dax_mode dax_mode; /* Dax specific conn data, non-NULL if DAX is enabled */ struct fuse_conn_dax *dax; #endif /** List of filesystems using this connection */ struct list_head mounts; /* New writepages go into this bucket */ struct fuse_sync_bucket __rcu *curr_bucket; #ifdef CONFIG_FUSE_PASSTHROUGH /** IDR for backing files ids */ struct idr backing_files_map; #endif #ifdef CONFIG_FUSE_IO_URING /** uring connection information*/ struct fuse_ring *ring; #endif /** Only used if the connection opts into request timeouts */ struct { /* Worker for checking if any requests have timed out */ struct delayed_work work; /* Request timeout (in jiffies). 0 = no timeout */ unsigned int req_timeout; } timeout; }; /* * Represents a mounted filesystem, potentially a submount. * * This object allows sharing a fuse_conn between separate mounts to * allow submounts with dedicated superblocks and thus separate device * IDs. */ struct fuse_mount { /* Underlying (potentially shared) connection to the FUSE server */ struct fuse_conn *fc; /* * Super block for this connection (fc->killsb must be held when * accessing this). */ struct super_block *sb; /* Entry on fc->mounts */ struct list_head fc_entry; struct rcu_head rcu; }; /* * Empty header for FUSE opcodes without specific header needs. * Used as a placeholder in args->in_args[0] for consistency * across all FUSE operations, simplifying request handling. */ struct fuse_zero_header {}; static inline void fuse_set_zero_arg0(struct fuse_args *args) { args->in_args[0].size = sizeof(struct fuse_zero_header); args->in_args[0].value = NULL; } static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb) { return sb->s_fs_info; } static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) { return get_fuse_mount_super(sb)->fc; } static inline struct fuse_mount *get_fuse_mount(struct inode *inode) { return get_fuse_mount_super(inode->i_sb); } static inline struct fuse_conn *get_fuse_conn(struct inode *inode) { return get_fuse_mount_super(inode->i_sb)->fc; } static inline struct fuse_inode *get_fuse_inode(const struct inode *inode) { return container_of(inode, struct fuse_inode, inode); } static inline u64 get_node_id(struct inode *inode) { return get_fuse_inode(inode)->nodeid; } static inline int invalid_nodeid(u64 nodeid) { return !nodeid || nodeid == FUSE_ROOT_ID; } static inline u64 fuse_get_attr_version(struct fuse_conn *fc) { return atomic64_read(&fc->attr_version); } static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc) { return atomic64_read(&fc->evict_ctr); } static inline bool fuse_stale_inode(const struct inode *inode, int generation, struct fuse_attr *attr) { return inode->i_generation != generation || inode_wrong_type(inode, attr->mode); } static inline void fuse_make_bad(struct inode *inode) { set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } static inline bool fuse_is_bad(struct inode *inode) { return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); } static inline bool fuse_inode_is_exclusive(const struct inode *inode) { const struct fuse_inode *fi = get_fuse_inode(inode); return test_bit(FUSE_I_EXCLUSIVE, &fi->state); } static inline struct folio **fuse_folios_alloc(unsigned int nfolios, gfp_t flags, struct fuse_folio_desc **desc) { struct folio **folios; folios = kzalloc(nfolios * (sizeof(struct folio *) + sizeof(struct fuse_folio_desc)), flags); *desc = (void *) (folios + nfolios); return folios; } static inline void fuse_folio_descs_length_init(struct fuse_folio_desc *descs, unsigned int index, unsigned int nr_folios) { int i; for (i = index; i < index + nr_folios; i++) descs[i].length = PAGE_SIZE - descs[i].offset; } static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) { /* Need RCU protection to prevent use after free after the decrement */ rcu_read_lock(); if (atomic_dec_and_test(&bucket->count)) wake_up(&bucket->waitq); rcu_read_unlock(); } /** Device operations */ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; /** * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version, u64 evict_ctr); int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode); /** * Send FORGET command */ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, u64 nodeid, u64 nlookup); struct fuse_forget_link *fuse_alloc_forget(void); /* * Initialize READ or READDIR request */ struct fuse_io_args { union { struct { struct fuse_read_in in; u64 attr_ver; } read; struct { struct fuse_write_in in; struct fuse_write_out out; bool folio_locked; } write; }; struct fuse_args_pages ap; struct fuse_io_priv *io; struct fuse_file *ff; }; void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, size_t count, int opcode); struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release); void fuse_file_free(struct fuse_file *ff); int fuse_finish_open(struct inode *inode, struct file *file); void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, unsigned int flags); /** * Send RELEASE or RELEASEDIR request */ void fuse_release_common(struct file *file, bool isdir); /** * Send FSYNC or FSYNCDIR request */ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, int datasync, int opcode); /** * Notify poll wakeup */ int fuse_notify_poll_wakeup(struct fuse_conn *fc, struct fuse_notify_poll_wakeup_out *outarg); /** * Initialize file operations on a regular file */ void fuse_init_file_inode(struct inode *inode, unsigned int flags); /** * Initialize inode operations on regular files and special files */ void fuse_init_common(struct inode *inode); /** * Initialize inode and file operations on a directory */ void fuse_init_dir(struct inode *inode); /** * Initialize inode operations on a symlink */ void fuse_init_symlink(struct inode *inode); /** * Change attributes of an inode */ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u64 attr_version); void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u32 cache_mask, u64 evict_ctr); u32 fuse_get_cache_mask(struct inode *inode); /** * Initialize the client device */ int fuse_dev_init(void); /** * Cleanup the client device */ void fuse_dev_cleanup(void); int fuse_ctl_init(void); void __exit fuse_ctl_cleanup(void); /** * Simple request sending that does request allocation and freeing */ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, struct fuse_mount *fm, struct fuse_args *args); static inline ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) { return __fuse_simple_request(&invalid_mnt_idmap, fm, args); } static inline ssize_t fuse_simple_idmap_request(struct mnt_idmap *idmap, struct fuse_mount *fm, struct fuse_args *args) { return __fuse_simple_request(idmap, fm, args); } int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, gfp_t gfp_flags); /** * Assign a unique id to a fuse request */ void fuse_request_assign_unique(struct fuse_iqueue *fiq, struct fuse_req *req); /** * End a finished request */ void fuse_request_end(struct fuse_req *req); /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); void fuse_wait_aborted(struct fuse_conn *fc); /* Check if any requests timed out */ void fuse_check_timeout(struct work_struct *work); void fuse_dentry_tree_init(void); void fuse_dentry_tree_cleanup(void); void fuse_epoch_work(struct work_struct *work); /** * Invalidate inode attributes */ /* Attributes possibly changed on data modification */ #define FUSE_STATX_MODIFY (STATX_MTIME | STATX_CTIME | STATX_BLOCKS) /* Attributes possibly changed on data and/or size modification */ #define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE) void fuse_invalidate_attr(struct inode *inode); void fuse_invalidate_attr_mask(struct inode *inode, u32 mask); void fuse_invalidate_entry_cache(struct dentry *entry); void fuse_invalidate_atime(struct inode *inode); u64 fuse_time_to_jiffies(u64 sec, u32 nsec); #define ATTR_TIMEOUT(o) \ fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec) void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o); /** * Acquire reference to fuse_conn */ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); /** * Initialize the fuse processing queue */ void fuse_pqueue_init(struct fuse_pqueue *fpq); /** * Initialize fuse_conn */ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, struct user_namespace *user_ns, const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); /** * Release reference to fuse_conn */ void fuse_conn_put(struct fuse_conn *fc); struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); struct fuse_dev *fuse_dev_alloc(void); void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); void fuse_dev_free(struct fuse_dev *fud); int fuse_send_init(struct fuse_mount *fm); /** * Fill in superblock and initialize fuse connection * @sb: partially-initialized superblock to fill in * @ctx: mount context */ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); /* * Remove the mount from the connection * * Returns whether this was the last mount */ bool fuse_mount_remove(struct fuse_mount *fm); /* * Setup context ops for submounts */ int fuse_init_fs_context_submount(struct fs_context *fsc); /* * Shut down the connection (possibly sending DESTROY request). */ void fuse_conn_destroy(struct fuse_mount *fm); /* Drop the connection and free the fuse mount */ void fuse_mount_destroy(struct fuse_mount *fm); /** * Add connection to control filesystem */ int fuse_ctl_add_conn(struct fuse_conn *fc); /** * Remove connection from control filesystem */ void fuse_ctl_remove_conn(struct fuse_conn *fc); /** * Is file type valid? */ int fuse_valid_type(int m); bool fuse_invalid_attr(struct fuse_attr *attr); /** * Is current process allowed to perform filesystem operation? */ bool fuse_allow_current_process(struct fuse_conn *fc); u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); void fuse_flush_time_update(struct inode *inode); void fuse_update_ctime(struct inode *inode); int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask); void fuse_flush_writepages(struct inode *inode); void fuse_set_nowrite(struct inode *inode); void fuse_release_nowrite(struct inode *inode); /** * Scan all fuse_mounts belonging to fc to find the first where * ilookup5() returns a result. Return that result and the * respective fuse_mount in *fm (unless fm is NULL). * * The caller must hold fc->killsb. */ struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, struct fuse_mount **fm); /** * File-system tells the kernel to invalidate cache for the given node id. */ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, loff_t offset, loff_t len); /** * File-system tells the kernel to invalidate parent attributes and * the dentry matching parent/name. * * If the child_nodeid is non-zero and: * - matches the inode number for the dentry matching parent/name, * - is not a mount point * - is a file or oan empty directory * then the dentry is unhashed (d_delete()). */ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, u64 child_nodeid, struct qstr *name, u32 flags); /* * Try to prune this inode. If neither the inode itself nor dentries associated * with this inode have any external reference, then the inode can be freed. */ void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid); int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, bool isdir); /** * fuse_direct_io() flags */ /** If set, it is WRITE; otherwise - READ */ #define FUSE_DIO_WRITE (1 << 0) /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ #define FUSE_DIO_CUSE (1 << 1) ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, loff_t *ppos, int flags); long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); long fuse_ioctl_common(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); __poll_t fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr, struct file *file); void fuse_set_initialized(struct fuse_conn *fc); void fuse_unlock_inode(struct inode *inode, bool locked); bool fuse_lock_inode(struct inode *inode); int fuse_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags, unsigned int extra_flags); ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, size_t size); ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size); int fuse_removexattr(struct inode *inode, const char *name); extern const struct xattr_handler * const fuse_xattr_handlers[]; struct posix_acl; struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu); struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type); int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry, struct posix_acl *acl, int type); /* readdir.c */ int fuse_readdir(struct file *file, struct dir_context *ctx); /** * Return the number of bytes in an arguments list */ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); /** * Get the next unique ID for a request */ u64 fuse_get_unique(struct fuse_iqueue *fiq); void fuse_free_conn(struct fuse_conn *fc); /* dax.c */ #define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode)) ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to); ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from); int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma); int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end); int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode, struct dax_device *dax_dev); void fuse_dax_conn_free(struct fuse_conn *fc); bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi); void fuse_dax_inode_init(struct inode *inode, unsigned int flags); void fuse_dax_inode_cleanup(struct inode *inode); void fuse_dax_dontcache(struct inode *inode, unsigned int flags); bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); void fuse_dax_cancel_work(struct fuse_conn *fc); /* ioctl.c */ long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int fuse_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct file_kattr *fa); /* iomode.c */ int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff); int fuse_inode_uncached_io_start(struct fuse_inode *fi, struct fuse_backing *fb); void fuse_inode_uncached_io_end(struct fuse_inode *fi); int fuse_file_io_open(struct file *file, struct inode *inode); void fuse_file_io_release(struct fuse_file *ff, struct inode *inode); /* file.c */ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, bool isdir); void fuse_file_release(struct inode *inode, struct fuse_file *ff, unsigned int open_flags, fl_owner_t id, bool isdir); /* backing.c */ #ifdef CONFIG_FUSE_PASSTHROUGH struct fuse_backing *fuse_backing_get(struct fuse_backing *fb); void fuse_backing_put(struct fuse_backing *fb); struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id); #else static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb) { return NULL; } static inline void fuse_backing_put(struct fuse_backing *fb) { } static inline struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id) { return NULL; } #endif void fuse_backing_files_init(struct fuse_conn *fc); void fuse_backing_files_free(struct fuse_conn *fc); int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); int fuse_backing_close(struct fuse_conn *fc, int backing_id); /* passthrough.c */ static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi) { #ifdef CONFIG_FUSE_PASSTHROUGH return READ_ONCE(fi->fb); #else return NULL; #endif } static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi, struct fuse_backing *fb) { #ifdef CONFIG_FUSE_PASSTHROUGH return xchg(&fi->fb, fb); #else return NULL; #endif } struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id); void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb); static inline struct file *fuse_file_passthrough(struct fuse_file *ff) { #ifdef CONFIG_FUSE_PASSTHROUGH return ff->passthrough; #else return NULL; #endif } ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter); ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *iter); ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags); ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma); #ifdef CONFIG_SYSCTL extern int fuse_sysctl_register(void); extern void fuse_sysctl_unregister(void); #else #define fuse_sysctl_register() (0) #define fuse_sysctl_unregister() do { } while (0) #endif /* CONFIG_SYSCTL */ #endif /* _FS_FUSE_I_H */ |
| 98 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/netfilter.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/moduleparam.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_timestamp.h> static bool nf_ct_tstamp __read_mostly; module_param_named(tstamp, nf_ct_tstamp, bool, 0644); MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); void nf_conntrack_tstamp_pernet_init(struct net *net) { net->ct.sysctl_tstamp = nf_ct_tstamp; } |
| 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | // SPDX-License-Identifier: GPL-2.0+ /* * rti802.c * Comedi driver for Analog Devices RTI-802 board * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1999 Anders Blomdell <anders.blomdell@control.lth.se> */ /* * Driver: rti802 * Description: Analog Devices RTI-802 * Author: Anders Blomdell <anders.blomdell@control.lth.se> * Devices: [Analog Devices] RTI-802 (rti802) * Status: works * * Configuration Options: * [0] - i/o base * [1] - unused * [2,4,6,8,10,12,14,16] - dac#[0-7] 0=two's comp, 1=straight * [3,5,7,9,11,13,15,17] - dac#[0-7] 0=bipolar, 1=unipolar */ #include <linux/module.h> #include <linux/comedi/comedidev.h> /* * Register I/O map */ #define RTI802_SELECT 0x00 #define RTI802_DATALOW 0x01 #define RTI802_DATAHIGH 0x02 struct rti802_private { enum { dac_2comp, dac_straight } dac_coding[8]; const struct comedi_lrange *range_type_list[8]; }; static int rti802_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct rti802_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); int i; outb(chan, dev->iobase + RTI802_SELECT); for (i = 0; i < insn->n; i++) { unsigned int val = data[i]; s->readback[chan] = val; /* munge offset binary to two's complement if needed */ if (devpriv->dac_coding[chan] == dac_2comp) val = comedi_offset_munge(s, val); outb(val & 0xff, dev->iobase + RTI802_DATALOW); outb((val >> 8) & 0xff, dev->iobase + RTI802_DATAHIGH); } return insn->n; } static int rti802_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct rti802_private *devpriv; struct comedi_subdevice *s; int i; int ret; ret = comedi_request_region(dev, it->options[0], 0x04); if (ret) return ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; ret = comedi_alloc_subdevices(dev, 1); if (ret) return ret; /* Analog Output subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE; s->maxdata = 0xfff; s->n_chan = 8; s->insn_write = rti802_ao_insn_write; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; s->range_table_list = devpriv->range_type_list; for (i = 0; i < 8; i++) { devpriv->dac_coding[i] = (it->options[3 + 2 * i]) ? (dac_straight) : (dac_2comp); devpriv->range_type_list[i] = (it->options[2 + 2 * i]) ? &range_unipolar10 : &range_bipolar10; } return 0; } static struct comedi_driver rti802_driver = { .driver_name = "rti802", .module = THIS_MODULE, .attach = rti802_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(rti802_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi driver for Analog Devices RTI-802 board"); MODULE_LICENSE("GPL"); |
| 77 77 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/types.h> #include <linux/netfilter.h> #include <net/tcp.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_seqadj.h> int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct nf_conn_seqadj *seqadj; struct nf_ct_seqadj *this_way; if (off == 0) return 0; set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); seqadj = nfct_seqadj(ct); this_way = &seqadj->seq[dir]; this_way->offset_before = off; this_way->offset_after = off; return 0; } EXPORT_SYMBOL_GPL(nf_ct_seqadj_init); int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, __be32 seq, s32 off) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct nf_ct_seqadj *this_way; if (off == 0) return 0; if (unlikely(!seqadj)) { WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n"); return 0; } set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); spin_lock_bh(&ct->lock); this_way = &seqadj->seq[dir]; if (this_way->offset_before == this_way->offset_after || before(this_way->correction_pos, ntohl(seq))) { this_way->correction_pos = ntohl(seq); this_way->offset_before = this_way->offset_after; this_way->offset_after += off; } spin_unlock_bh(&ct->lock); return 0; } EXPORT_SYMBOL_GPL(nf_ct_seqadj_set); void nf_ct_tcp_seqadj_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off) { const struct tcphdr *th; if (nf_ct_protonum(ct) != IPPROTO_TCP) return; th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb)); nf_ct_seqadj_set(ct, ctinfo, th->seq, off); } EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set); /* Adjust one found SACK option including checksum correction */ static void nf_ct_sack_block_adjust(struct sk_buff *skb, struct tcphdr *tcph, unsigned int sackoff, unsigned int sackend, struct nf_ct_seqadj *seq) { while (sackoff < sackend) { struct tcp_sack_block_wire *sack; __be32 new_start_seq, new_end_seq; sack = (void *)skb->data + sackoff; if (after(ntohl(sack->start_seq) - seq->offset_before, seq->correction_pos)) new_start_seq = htonl(ntohl(sack->start_seq) - seq->offset_after); else new_start_seq = htonl(ntohl(sack->start_seq) - seq->offset_before); if (after(ntohl(sack->end_seq) - seq->offset_before, seq->correction_pos)) new_end_seq = htonl(ntohl(sack->end_seq) - seq->offset_after); else new_end_seq = htonl(ntohl(sack->end_seq) - seq->offset_before); pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n", ntohl(sack->start_seq), ntohl(new_start_seq), ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, sack->start_seq, new_start_seq, false); inet_proto_csum_replace4(&tcph->check, skb, sack->end_seq, new_end_seq, false); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); } } /* TCP SACK sequence number adjustment */ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { struct tcphdr *tcph = (void *)skb->data + protoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); unsigned int dir, optoff, optend; optoff = protoff + sizeof(struct tcphdr); optend = protoff + tcph->doff * 4; if (skb_ensure_writable(skb, optend)) return 0; tcph = (void *)skb->data + protoff; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { /* Usually: option, length. */ unsigned char *op = skb->data + optoff; switch (op[0]) { case TCPOPT_EOL: return 1; case TCPOPT_NOP: optoff++; continue; default: /* no partial options */ if (optoff + 1 == optend || optoff + op[1] > optend || op[1] < 2) return 0; if (op[0] == TCPOPT_SACK && op[1] >= 2+TCPOLEN_SACK_PERBLOCK && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) nf_ct_sack_block_adjust(skb, tcph, optoff + 2, optoff+op[1], &seqadj->seq[!dir]); optoff += op[1]; } } return 1; } /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct tcphdr *tcph; __be32 newseq, newack; s32 seqoff, ackoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *this_way, *other_way; int res = 1; this_way = &seqadj->seq[dir]; other_way = &seqadj->seq[!dir]; if (skb_ensure_writable(skb, protoff + sizeof(*tcph))) return 0; tcph = (void *)skb->data + protoff; spin_lock_bh(&ct->lock); if (after(ntohl(tcph->seq), this_way->correction_pos)) seqoff = this_way->offset_after; else seqoff = this_way->offset_before; newseq = htonl(ntohl(tcph->seq) + seqoff); inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); pr_debug("Adjusting sequence number from %u->%u\n", ntohl(tcph->seq), ntohl(newseq)); tcph->seq = newseq; if (!tcph->ack) goto out; if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) ackoff = other_way->offset_after; else ackoff = other_way->offset_before; newack = htonl(ntohl(tcph->ack_seq) - ackoff); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, false); pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), ntohl(newack)); tcph->ack_seq = newack; res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo); out: spin_unlock_bh(&ct->lock); return res; } EXPORT_SYMBOL_GPL(nf_ct_seq_adjust); s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *this_way; if (!seqadj) return 0; this_way = &seqadj->seq[dir]; return after(seq, this_way->correction_pos) ? this_way->offset_after : this_way->offset_before; } EXPORT_SYMBOL_GPL(nf_ct_seq_offset); |
| 1 1 17 17 17 17 1 17 1 17 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | // SPDX-License-Identifier: GPL-2.0-only /* * User address space access functions. * * Copyright 1997 Andi Kleen <ak@muc.de> * Copyright 1997 Linus Torvalds * Copyright 2002 Andi Kleen <ak@suse.de> */ #include <linux/export.h> #include <linux/uaccess.h> #include <linux/highmem.h> #include <linux/libnvdimm.h> /* * Zero Userspace */ #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB * @addr: virtual start address * @size: number of bytes to write back * * Write back a cache range using the CLWB (cache line write back) * instruction. Note that @size is internally rounded up to be cache * line size aligned. */ static void clean_cache_range(void *addr, size_t size) { u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; unsigned long clflush_mask = x86_clflush_size - 1; void *vend = addr + size; void *p; for (p = (void *)((unsigned long)addr & ~clflush_mask); p < vend; p += x86_clflush_size) clwb(p); } void arch_wb_cache_pmem(void *addr, size_t size) { clean_cache_range(addr, size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) { unsigned long flushed, dest = (unsigned long) dst; long rc; stac(); rc = __copy_user_nocache(dst, src, size); clac(); /* * __copy_user_nocache() uses non-temporal stores for the bulk * of the transfer, but we need to manually flush if the * transfer is unaligned. A cached memory copy is used when * destination or size is not naturally aligned. That is: * - Require 8-byte alignment when size is 8 bytes or larger. * - Require 4-byte alignment when size is 4 bytes. */ if (size < 8) { if (!IS_ALIGNED(dest, 4) || size != 4) clean_cache_range(dst, size); } else { if (!IS_ALIGNED(dest, 8)) { dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); clean_cache_range(dst, 1); } flushed = dest - (unsigned long) dst; if (size > flushed && !IS_ALIGNED(size - flushed, 8)) clean_cache_range(dst + size - 1, 1); } return rc; } void __memcpy_flushcache(void *_dst, const void *_src, size_t size) { unsigned long dest = (unsigned long) _dst; unsigned long source = (unsigned long) _src; /* cache copy and flush to align dest */ if (!IS_ALIGNED(dest, 8)) { size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest); memcpy((void *) dest, (void *) source, len); clean_cache_range((void *) dest, len); dest += len; source += len; size -= len; if (!size) return; } /* 4x8 movnti loop */ while (size >= 32) { asm("movq (%0), %%r8\n" "movq 8(%0), %%r9\n" "movq 16(%0), %%r10\n" "movq 24(%0), %%r11\n" "movnti %%r8, (%1)\n" "movnti %%r9, 8(%1)\n" "movnti %%r10, 16(%1)\n" "movnti %%r11, 24(%1)\n" :: "r" (source), "r" (dest) : "memory", "r8", "r9", "r10", "r11"); dest += 32; source += 32; size -= 32; } /* 1x8 movnti loop */ while (size >= 8) { asm("movq (%0), %%r8\n" "movnti %%r8, (%1)\n" :: "r" (source), "r" (dest) : "memory", "r8"); dest += 8; source += 8; size -= 8; } /* 1x4 movnti loop */ while (size >= 4) { asm("movl (%0), %%r8d\n" "movnti %%r8d, (%1)\n" :: "r" (source), "r" (dest) : "memory", "r8"); dest += 4; source += 4; size -= 4; } /* cache copy for remaining bytes */ if (size) { memcpy((void *) dest, (void *) source, size); clean_cache_range((void *) dest, size); } } EXPORT_SYMBOL_GPL(__memcpy_flushcache); #endif |
| 2017 105 53 4 29 5 2 1513 102 102 102 102 102 1769 110 1212 1211 1213 649 101 26 38 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_INETDEVICE_H #define _LINUX_INETDEVICE_H #ifdef __KERNEL__ #include <linux/bitmap.h> #include <linux/if.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/rcupdate.h> #include <linux/timer.h> #include <linux/sysctl.h> #include <linux/rtnetlink.h> #include <linux/refcount.h> struct ipv4_devconf { void *sysctl; int data[IPV4_DEVCONF_MAX]; DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); }; #define MC_HASH_SZ_LOG 9 struct in_device { struct net_device *dev; netdevice_tracker dev_tracker; refcount_t refcnt; int dead; struct in_ifaddr __rcu *ifa_list;/* IP ifaddr chain */ struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ struct ip_mc_list __rcu * __rcu *mc_hash; int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; unsigned long mr_v2_seen; unsigned long mr_maxdelay; unsigned long mr_qi; /* Query Interval */ unsigned long mr_qri; /* Query Response Interval */ unsigned char mr_qrv; /* Query Robustness Variable */ unsigned char mr_gq_running; u32 mr_ifc_count; struct timer_list mr_gq_timer; /* general query timer */ struct timer_list mr_ifc_timer; /* interface change timer */ struct neigh_parms *arp_parms; struct ipv4_devconf cnf; struct rcu_head rcu_head; }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1]) #define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr)) #define IPV4_DEVCONF_ALL(net, attr) \ IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr) #define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr)) static inline int ipv4_devconf_get(const struct in_device *in_dev, int index) { index--; return READ_ONCE(in_dev->cnf.data[index]); } static inline void ipv4_devconf_set(struct in_device *in_dev, int index, int val) { index--; set_bit(index, in_dev->cnf.state); WRITE_ONCE(in_dev->cnf.data[index], val); } static inline void ipv4_devconf_setall(struct in_device *in_dev) { bitmap_fill(in_dev->cnf.state, IPV4_DEVCONF_MAX); } #define IN_DEV_CONF_GET(in_dev, attr) \ ipv4_devconf_get((in_dev), IPV4_DEVCONF_ ## attr) #define IN_DEV_CONF_SET(in_dev, attr, val) \ ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_NET_ORCONF(in_dev, net, attr) \ (IPV4_DEVCONF_ALL_RO(net, attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr) #define IN_DEV_MAXCONF(in_dev, attr) \ (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) #define IN_DEV_BFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), BC_FORWARDING) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) #define IN_DEV_SRC_VMARK(in_dev) IN_DEV_ORCONF((in_dev), SRC_VMARK) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) #define IN_DEV_ACCEPT_LOCAL(in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL) #define IN_DEV_BOOTP_RELAY(in_dev) IN_DEV_ANDCONF((in_dev), BOOTP_RELAY) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) #define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ SECURE_REDIRECTS) #define IN_DEV_IDTAG(in_dev) IN_DEV_CONF_GET(in_dev, TAG) #define IN_DEV_MEDIUM_ID(in_dev) IN_DEV_CONF_GET(in_dev, MEDIUM_ID) #define IN_DEV_PROMOTE_SECONDARIES(in_dev) \ IN_DEV_ORCONF((in_dev), \ PROMOTE_SECONDARIES) #define IN_DEV_ROUTE_LOCALNET(in_dev) IN_DEV_ORCONF(in_dev, ROUTE_LOCALNET) #define IN_DEV_NET_ROUTE_LOCALNET(in_dev, net) \ IN_DEV_NET_ORCONF(in_dev, net, ROUTE_LOCALNET) #define IN_DEV_RX_REDIRECTS(in_dev) \ ((IN_DEV_FORWARD(in_dev) && \ IN_DEV_ANDCONF((in_dev), ACCEPT_REDIRECTS)) \ || (!IN_DEV_FORWARD(in_dev) && \ IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) #define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) #define IN_DEV_ARP_EVICT_NOCARRIER(in_dev) IN_DEV_ANDCONF((in_dev), \ ARP_EVICT_NOCARRIER) struct in_ifaddr { struct hlist_node addr_lst; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; __be32 ifa_local; __be32 ifa_address; __be32 ifa_mask; __u32 ifa_rt_priority; __be32 ifa_broadcast; unsigned char ifa_scope; unsigned char ifa_prefixlen; unsigned char ifa_proto; __u32 ifa_flags; char ifa_label[IFNAMSIZ]; /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 ifa_valid_lft; __u32 ifa_preferred_lft; unsigned long ifa_cstamp; /* created timestamp */ unsigned long ifa_tstamp; /* updated timestamp */ }; struct in_validator_info { __be32 ivi_addr; struct in_device *ivi_dev; struct netlink_ext_ack *extack; }; int register_inetaddr_notifier(struct notifier_block *nb); int unregister_inetaddr_notifier(struct notifier_block *nb); int register_inetaddr_validator_notifier(struct notifier_block *nb); int unregister_inetaddr_validator_notifier(struct notifier_block *nb); void inet_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv4_devconf *devconf); struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) { return __ip_dev_find(net, addr, true); } int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *); #ifdef CONFIG_INET int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size); #else static inline int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { return 0; } #endif void devinet_init(void); struct in_device *inetdev_by_index(struct net *, int); __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr); static inline bool inet_ifa_match(__be32 addr, const struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); } /* * Check if a mask is acceptable. */ static __inline__ bool bad_mask(__be32 mask, __be32 addr) { __u32 hmask; if (addr & (mask = ~mask)) return true; hmask = ntohl(mask); if (hmask & (hmask+1)) return true; return false; } #define in_dev_for_each_ifa_rtnl(ifa, in_dev) \ for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa; \ ifa = rtnl_dereference(ifa->ifa_next)) #define in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) \ for (ifa = rtnl_net_dereference(net, (in_dev)->ifa_list); ifa; \ ifa = rtnl_net_dereference(net, ifa->ifa_next)) #define in_dev_for_each_ifa_rcu(ifa, in_dev) \ for (ifa = rcu_dereference((in_dev)->ifa_list); ifa; \ ifa = rcu_dereference(ifa->ifa_next)) static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->ip_ptr); } static inline struct in_device *in_dev_get(const struct net_device *dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) refcount_inc(&in_dev->refcnt); rcu_read_unlock(); return in_dev; } static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->ip_ptr); } static inline struct in_device *__in_dev_get_rtnl_net(const struct net_device *dev) { return rtnl_net_dereference(dev_net(dev), dev->ip_ptr); } /* called with rcu_read_lock or rtnl held */ static inline bool ip_ignore_linkdown(const struct net_device *dev) { struct in_device *in_dev; bool rc = false; in_dev = rcu_dereference_rtnl(dev->ip_ptr); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rc = true; return rc; } static inline struct neigh_parms *__in_dev_arp_parms_get_rcu(const struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); return in_dev ? in_dev->arp_parms : NULL; } void in_dev_finish_destroy(struct in_device *idev); static inline void in_dev_put(struct in_device *idev) { if (refcount_dec_and_test(&idev->refcnt)) in_dev_finish_destroy(idev); } #define __in_dev_put(idev) refcount_dec(&(idev)->refcnt) #define in_dev_hold(idev) refcount_inc(&(idev)->refcnt) #endif /* __KERNEL__ */ static __inline__ __be32 inet_make_mask(int logmask) { if (logmask) return htonl(~((1U<<(32-logmask))-1)); return 0; } static __inline__ int inet_mask_len(__be32 mask) { __u32 hmask = ntohl(mask); if (!hmask) return 0; return 32 - ffz(~hmask); } #endif /* _LINUX_INETDEVICE_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | /* SPDX-License-Identifier: GPL-2.0 */ /* * PHY device list allow maintaining a list of PHY devices that are * part of a netdevice's link topology. PHYs can for example be chained, * as is the case when using a PHY that exposes an SFP module, on which an * SFP transceiver that embeds a PHY is connected. * * This list can then be used by userspace to leverage individual PHY * capabilities. */ #ifndef __PHY_LINK_TOPOLOGY_H #define __PHY_LINK_TOPOLOGY_H #include <linux/ethtool.h> #include <linux/netdevice.h> struct xarray; struct phy_device; struct sfp_bus; struct phy_link_topology { struct xarray phys; u32 next_phy_index; }; struct phy_device_node { enum phy_upstream upstream_type; union { struct net_device *netdev; struct phy_device *phydev; } upstream; struct sfp_bus *parent_sfp_bus; struct phy_device *phy; }; #if IS_ENABLED(CONFIG_PHYLIB) int phy_link_topo_add_phy(struct net_device *dev, struct phy_device *phy, enum phy_upstream upt, void *upstream); void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy); static inline struct phy_device * phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) { struct phy_link_topology *topo = dev->link_topo; struct phy_device_node *pdn; if (!topo) return NULL; pdn = xa_load(&topo->phys, phyindex); if (pdn) return pdn->phy; return NULL; } #else static inline int phy_link_topo_add_phy(struct net_device *dev, struct phy_device *phy, enum phy_upstream upt, void *upstream) { return 0; } static inline void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy) { } static inline struct phy_device * phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) { return NULL; } #endif #endif /* __PHY_LINK_TOPOLOGY_H */ |
| 292 291 2 1 2 1 1 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains AppArmor policy manipulation functions * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2017 Canonical Ltd. * * AppArmor policy namespaces, allow for different sets of policies * to be loaded for tasks within the namespace. */ #include <linux/list.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/string.h> #include "include/apparmor.h" #include "include/cred.h" #include "include/policy_ns.h" #include "include/label.h" #include "include/policy.h" /* kernel label */ struct aa_label *kernel_t; /* root profile namespace */ struct aa_ns *root_ns; const char *aa_hidden_ns_name = "---"; /** * aa_ns_visible - test if @view is visible from @curr * @curr: namespace to treat as the parent (NOT NULL) * @view: namespace to test if visible from @curr (NOT NULL) * @subns: whether view of a subns is allowed * * Returns: true if @view is visible from @curr else false */ bool aa_ns_visible(struct aa_ns *curr, struct aa_ns *view, bool subns) { if (curr == view) return true; if (!subns) return false; for ( ; view; view = view->parent) { if (view->parent == curr) return true; } return false; } /** * aa_ns_name - Find the ns name to display for @view from @curr * @curr: current namespace (NOT NULL) * @view: namespace attempting to view (NOT NULL) * @subns: are subns visible * * Returns: name of @view visible from @curr */ const char *aa_ns_name(struct aa_ns *curr, struct aa_ns *view, bool subns) { /* if view == curr then the namespace name isn't displayed */ if (curr == view) return ""; if (aa_ns_visible(curr, view, subns)) { /* at this point if a ns is visible it is in a view ns * thus the curr ns.hname is a prefix of its name. * Only output the virtualized portion of the name * Add + 2 to skip over // separating curr hname prefix * from the visible tail of the views hname */ return view->base.hname + strlen(curr->base.hname) + 2; } return aa_hidden_ns_name; } static struct aa_profile *alloc_unconfined(const char *name) { struct aa_profile *profile; profile = aa_alloc_null(NULL, name, GFP_KERNEL); if (!profile) return NULL; profile->label.flags |= FLAG_IX_ON_NAME_ERROR | FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED; profile->mode = APPARMOR_UNCONFINED; return profile; } /** * alloc_ns - allocate, initialize and return a new namespace * @prefix: parent namespace name (MAYBE NULL) * @name: a preallocated name (NOT NULL) * * Returns: refcounted namespace or NULL on failure. */ static struct aa_ns *alloc_ns(const char *prefix, const char *name) { struct aa_ns *ns; ns = kzalloc(sizeof(*ns), GFP_KERNEL); AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, ns); if (!ns) return NULL; if (!aa_policy_init(&ns->base, prefix, name, GFP_KERNEL)) goto fail_ns; INIT_LIST_HEAD(&ns->sub_ns); INIT_LIST_HEAD(&ns->rawdata_list); mutex_init(&ns->lock); init_waitqueue_head(&ns->wait); /* released by aa_free_ns() */ ns->unconfined = alloc_unconfined("unconfined"); if (!ns->unconfined) goto fail_unconfined; /* ns and ns->unconfined share ns->unconfined refcount */ ns->unconfined->ns = ns; atomic_set(&ns->uniq_null, 0); aa_labelset_init(&ns->labels); return ns; fail_unconfined: aa_policy_destroy(&ns->base); fail_ns: kfree_sensitive(ns); return NULL; } /** * aa_free_ns - free a profile namespace * @ns: the namespace to free (MAYBE NULL) * * Requires: All references to the namespace must have been put, if the * namespace was referenced by a profile confining a task, */ void aa_free_ns(struct aa_ns *ns) { if (!ns) return; aa_policy_destroy(&ns->base); aa_labelset_destroy(&ns->labels); aa_put_ns(ns->parent); ns->unconfined->ns = NULL; aa_free_profile(ns->unconfined); kfree_sensitive(ns); } /** * __aa_lookupn_ns - lookup the namespace matching @hname * @view: namespace to search in (NOT NULL) * @hname: hierarchical ns name (NOT NULL) * @n: length of @hname * * Requires: rcu_read_lock be held * * Returns: unrefcounted ns pointer or NULL if not found * * Do a relative name lookup, recursing through profile tree. */ struct aa_ns *__aa_lookupn_ns(struct aa_ns *view, const char *hname, size_t n) { struct aa_ns *ns = view; const char *split; for (split = strnstr(hname, "//", n); split; split = strnstr(hname, "//", n)) { ns = __aa_findn_ns(&ns->sub_ns, hname, split - hname); if (!ns) return NULL; n -= split + 2 - hname; hname = split + 2; } if (n) return __aa_findn_ns(&ns->sub_ns, hname, n); return NULL; } /** * aa_lookupn_ns - look up a policy namespace relative to @view * @view: namespace to search in (NOT NULL) * @name: name of namespace to find (NOT NULL) * @n: length of @name * * Returns: a refcounted namespace on the list, or NULL if no namespace * called @name exists. * * refcount released by caller */ struct aa_ns *aa_lookupn_ns(struct aa_ns *view, const char *name, size_t n) { struct aa_ns *ns = NULL; rcu_read_lock(); ns = aa_get_ns(__aa_lookupn_ns(view, name, n)); rcu_read_unlock(); return ns; } static struct aa_ns *__aa_create_ns(struct aa_ns *parent, const char *name, struct dentry *dir) { struct aa_ns *ns; int error; AA_BUG(!parent); AA_BUG(!name); AA_BUG(!mutex_is_locked(&parent->lock)); ns = alloc_ns(parent->base.hname, name); if (!ns) return ERR_PTR(-ENOMEM); ns->level = parent->level + 1; mutex_lock_nested(&ns->lock, ns->level); error = __aafs_ns_mkdir(ns, ns_subns_dir(parent), name, dir); if (error) { AA_ERROR("Failed to create interface for ns %s\n", ns->base.name); mutex_unlock(&ns->lock); aa_free_ns(ns); return ERR_PTR(error); } ns->parent = aa_get_ns(parent); list_add_rcu(&ns->base.list, &parent->sub_ns); /* add list ref */ aa_get_ns(ns); mutex_unlock(&ns->lock); return ns; } /** * __aa_find_or_create_ns - create an ns, fail if it already exists * @parent: the parent of the namespace being created * @name: the name of the namespace * @dir: if not null the dir to put the ns entries in * * Returns: the a refcounted ns that has been add or an ERR_PTR */ struct aa_ns *__aa_find_or_create_ns(struct aa_ns *parent, const char *name, struct dentry *dir) { struct aa_ns *ns; AA_BUG(!mutex_is_locked(&parent->lock)); /* try and find the specified ns */ /* released by caller */ ns = aa_get_ns(__aa_find_ns(&parent->sub_ns, name)); if (!ns) ns = __aa_create_ns(parent, name, dir); else ns = ERR_PTR(-EEXIST); /* return ref */ return ns; } /** * aa_prepare_ns - find an existing or create a new namespace of @name * @parent: ns to treat as parent * @name: the namespace to find or add (NOT NULL) * * Returns: refcounted namespace or PTR_ERR if failed to create one */ struct aa_ns *aa_prepare_ns(struct aa_ns *parent, const char *name) { struct aa_ns *ns; mutex_lock_nested(&parent->lock, parent->level); /* try and find the specified ns and if it doesn't exist create it */ /* released by caller */ ns = aa_get_ns(__aa_find_ns(&parent->sub_ns, name)); if (!ns) ns = __aa_create_ns(parent, name, NULL); mutex_unlock(&parent->lock); /* return ref */ return ns; } static void __ns_list_release(struct list_head *head); /** * destroy_ns - remove everything contained by @ns * @ns: namespace to have it contents removed (NOT NULL) */ static void destroy_ns(struct aa_ns *ns) { if (!ns) return; mutex_lock_nested(&ns->lock, ns->level); /* release all profiles in this namespace */ __aa_profile_list_release(&ns->base.profiles); /* release all sub namespaces */ __ns_list_release(&ns->sub_ns); if (ns->parent) { unsigned long flags; write_lock_irqsave(&ns->labels.lock, flags); __aa_proxy_redirect(ns_unconfined(ns), ns_unconfined(ns->parent)); write_unlock_irqrestore(&ns->labels.lock, flags); } __aafs_ns_rmdir(ns); mutex_unlock(&ns->lock); } /** * __aa_remove_ns - remove a namespace and all its children * @ns: namespace to be removed (NOT NULL) * * Requires: ns->parent->lock be held and ns removed from parent. */ void __aa_remove_ns(struct aa_ns *ns) { /* remove ns from namespace list */ list_del_rcu(&ns->base.list); destroy_ns(ns); aa_put_ns(ns); } /** * __ns_list_release - remove all profile namespaces on the list put refs * @head: list of profile namespaces (NOT NULL) * * Requires: namespace lock be held */ static void __ns_list_release(struct list_head *head) { struct aa_ns *ns, *tmp; list_for_each_entry_safe(ns, tmp, head, base.list) __aa_remove_ns(ns); } /** * aa_alloc_root_ns - allocate the root profile namespace * * Returns: %0 on success else error * */ int __init aa_alloc_root_ns(void) { struct aa_profile *kernel_p; /* released by aa_free_root_ns - used as list ref*/ root_ns = alloc_ns(NULL, "root"); if (!root_ns) return -ENOMEM; kernel_p = alloc_unconfined("kernel_t"); if (!kernel_p) { destroy_ns(root_ns); aa_free_ns(root_ns); return -ENOMEM; } kernel_t = &kernel_p->label; root_ns->unconfined->ns = aa_get_ns(root_ns); return 0; } /** * aa_free_root_ns - free the root profile namespace */ void __init aa_free_root_ns(void) { struct aa_ns *ns = root_ns; root_ns = NULL; aa_label_free(kernel_t); destroy_ns(ns); aa_put_ns(ns); } |
| 282 77 5 5 10 14 8 9 9 14 52 21 51 21 21 21 21 21 21 21 21 21 51 17 26 19 26 2 26 1 26 4 26 4 4 4 4 21 22 22 22 22 22 22 22 11 1 10 10 1 1 10 1 38 38 38 38 38 38 37 36 27 1 1 1 26 26 1 26 26 26 1 26 4 26 26 26 4 4 22 26 26 26 21 9 14 14 21 33 25 25 25 25 25 25 7 25 21 21 21 10 10 283 283 7 7 83 83 1 1 175 12 3 3 3 166 9 179 3 179 20 21 161 16 163 11 11 11 17 5 3 1203 29 17 1202 17 17 17 17 1568 16 13 13 16 17 17 14 17 16 28 28 28 13 11 4 9 17 17 17 17 17 4 4 16 16 10 10 14 14 14 7 17 17 14 17 19 19 19 3 17 20 14 12 10 8 10 2 10 58 57 51 58 58 10 203 235 228 224 219 212 237 53 53 53 52 49 51 44 5 51 50 50 50 49 11 48 48 44 43 10 10 43 43 43 43 50 43 43 27 43 33 43 53 19 19 19 18 12 12 12 12 3 11 11 10 8 8 8 8 8 6 6 6 6 5 5 6 1 7 12 19 7 7 2 2 55 1 54 54 50 49 48 48 48 28 12 13 11 11 13 24 1 22 23 20 19 19 15 7 7 6 6 20 19 19 13 13 8 12 11 10 4 5 21 22 22 18 17 17 17 17 5 5 3 3 28 1 27 27 24 23 23 23 16 16 14 23 28 74 35 10 57 1 56 57 57 52 52 51 51 44 20 20 17 17 20 15 13 14 1 12 12 10 10 5 261 255 253 15 53 19 7 55 24 57 21 22 193 247 4 1 4 2 4 9 2 23 23 22 22 22 1 1 23 1 22 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 | // SPDX-License-Identifier: GPL-2.0-only /* * fs/userfaultfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * Copyright (C) 2008-2009 Red Hat, Inc. * Copyright (C) 2015 Red Hat, Inc. * * Some part derived from fs/eventfd.c (anon inode setup) and * mm/ksm.c (mm hashing). */ #include <linux/list.h> #include <linux/hashtable.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/mmu_notifier.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/file.h> #include <linux/bug.h> #include <linux/anon_inodes.h> #include <linux/syscalls.h> #include <linux/userfaultfd_k.h> #include <linux/mempolicy.h> #include <linux/ioctl.h> #include <linux/security.h> #include <linux/hugetlb.h> #include <linux/leafops.h> #include <linux/miscdevice.h> #include <linux/uio.h> static int sysctl_unprivileged_userfaultfd __read_mostly; #ifdef CONFIG_SYSCTL static const struct ctl_table vm_userfaultfd_table[] = { { .procname = "unprivileged_userfaultfd", .data = &sysctl_unprivileged_userfaultfd, .maxlen = sizeof(sysctl_unprivileged_userfaultfd), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, }; #endif static struct kmem_cache *userfaultfd_ctx_cachep __ro_after_init; struct userfaultfd_fork_ctx { struct userfaultfd_ctx *orig; struct userfaultfd_ctx *new; struct list_head list; }; struct userfaultfd_unmap_ctx { struct userfaultfd_ctx *ctx; unsigned long start; unsigned long end; struct list_head list; }; struct userfaultfd_wait_queue { struct uffd_msg msg; wait_queue_entry_t wq; struct userfaultfd_ctx *ctx; bool waken; }; struct userfaultfd_wake_range { unsigned long start; unsigned long len; }; /* internal indication that UFFD_API ioctl was successfully executed */ #define UFFD_FEATURE_INITIALIZED (1u << 31) static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) { return ctx->features & UFFD_FEATURE_INITIALIZED; } static bool userfaultfd_wp_async_ctx(struct userfaultfd_ctx *ctx) { return ctx && (ctx->features & UFFD_FEATURE_WP_ASYNC); } /* * Whether WP_UNPOPULATED is enabled on the uffd context. It is only * meaningful when userfaultfd_wp()==true on the vma and when it's * anonymous. */ bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) { struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx) return false; return ctx->features & UFFD_FEATURE_WP_UNPOPULATED; } static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { struct userfaultfd_wake_range *range = key; int ret; struct userfaultfd_wait_queue *uwq; unsigned long start, len; uwq = container_of(wq, struct userfaultfd_wait_queue, wq); ret = 0; /* len == 0 means wake all */ start = range->start; len = range->len; if (len && (start > uwq->msg.arg.pagefault.address || start + len <= uwq->msg.arg.pagefault.address)) goto out; WRITE_ONCE(uwq->waken, true); /* * The Program-Order guarantees provided by the scheduler * ensure uwq->waken is visible before the task is woken. */ ret = wake_up_state(wq->private, mode); if (ret) { /* * Wake only once, autoremove behavior. * * After the effect of list_del_init is visible to the other * CPUs, the waitqueue may disappear from under us, see the * !list_empty_careful() in handle_userfault(). * * try_to_wake_up() has an implicit smp_mb(), and the * wq->private is read before calling the extern function * "wake_up_state" (which in turns calls try_to_wake_up). */ list_del_init(&wq->entry); } out: return ret; } /** * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to the userfaultfd context. */ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) { refcount_inc(&ctx->refcount); } /** * userfaultfd_ctx_put - Releases a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to userfaultfd context. * * The userfaultfd context reference must have been previously acquired either * with userfaultfd_ctx_get() or userfaultfd_ctx_fdget(). */ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) { if (refcount_dec_and_test(&ctx->refcount)) { VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_pending_wqh.lock)); VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_pending_wqh)); VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_wqh.lock)); VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_wqh)); VM_WARN_ON_ONCE(spin_is_locked(&ctx->event_wqh.lock)); VM_WARN_ON_ONCE(waitqueue_active(&ctx->event_wqh)); VM_WARN_ON_ONCE(spin_is_locked(&ctx->fd_wqh.lock)); VM_WARN_ON_ONCE(waitqueue_active(&ctx->fd_wqh)); mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } } static inline void msg_init(struct uffd_msg *msg) { BUILD_BUG_ON(sizeof(struct uffd_msg) != 32); /* * Must use memset to zero out the paddings or kernel data is * leaked to userland. */ memset(msg, 0, sizeof(struct uffd_msg)); } static inline struct uffd_msg userfault_msg(unsigned long address, unsigned long real_address, unsigned int flags, unsigned long reason, unsigned int features) { struct uffd_msg msg; msg_init(&msg); msg.event = UFFD_EVENT_PAGEFAULT; msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ? real_address : address; /* * These flags indicate why the userfault occurred: * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault. * - UFFD_PAGEFAULT_FLAG_MINOR indicates a minor fault. * - Neither of these flags being set indicates a MISSING fault. * * Separately, UFFD_PAGEFAULT_FLAG_WRITE indicates it was a write * fault. Otherwise, it was a read fault. */ if (flags & FAULT_FLAG_WRITE) msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WRITE; if (reason & VM_UFFD_WP) msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; if (reason & VM_UFFD_MINOR) msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_MINOR; if (features & UFFD_FEATURE_THREAD_ID) msg.arg.pagefault.feat.ptid = task_pid_vnr(current); return msg; } #ifdef CONFIG_HUGETLB_PAGE /* * Same functionality as userfaultfd_must_wait below with modifications for * hugepmd ranges. */ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { struct vm_area_struct *vma = vmf->vma; pte_t *ptep, pte; assert_fault_locked(vmf); ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma)); if (!ptep) return true; pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep); /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ /* Entry is still missing, wait for userspace to resolve the fault. */ if (huge_pte_none(pte)) return true; /* UFFD PTE markers require userspace to resolve the fault. */ if (pte_is_uffd_marker(pte)) return true; /* * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to * resolve the fault. */ if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) return true; return false; } #else static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { /* Should never get here. */ VM_WARN_ON_ONCE(1); return false; } #endif /* CONFIG_HUGETLB_PAGE */ /* * Verify the pagetables are still not ok after having registered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any * userfault that has already been resolved, if userfaultfd_read_iter and * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different * threads. */ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { struct mm_struct *mm = ctx->mm; unsigned long address = vmf->address; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; pte_t ptent; bool ret; assert_fault_locked(vmf); pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) return true; p4d = p4d_offset(pgd, address); if (!p4d_present(*p4d)) return true; pud = pud_offset(p4d, address); if (!pud_present(*pud)) return true; pmd = pmd_offset(pud, address); again: _pmd = pmdp_get_lockless(pmd); if (pmd_none(_pmd)) return true; /* * A race could arise which would result in a softleaf entry such as * migration entry unexpectedly being present in the PMD, so explicitly * check for this and bail out if so. */ if (!pmd_present(_pmd)) return false; if (pmd_trans_huge(_pmd)) return !pmd_write(_pmd) && (reason & VM_UFFD_WP); pte = pte_offset_map(pmd, address); if (!pte) goto again; /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ ptent = ptep_get(pte); ret = true; /* Entry is still missing, wait for userspace to resolve the fault. */ if (pte_none(ptent)) goto out; /* UFFD PTE markers require userspace to resolve the fault. */ if (pte_is_uffd_marker(ptent)) goto out; /* * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to * resolve the fault. */ if (!pte_write(ptent) && (reason & VM_UFFD_WP)) goto out; ret = false; out: pte_unmap(pte); return ret; } static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags) { if (flags & FAULT_FLAG_INTERRUPTIBLE) return TASK_INTERRUPTIBLE; if (flags & FAULT_FLAG_KILLABLE) return TASK_KILLABLE; return TASK_UNINTERRUPTIBLE; } /* * The locking rules involved in returning VM_FAULT_RETRY depending on * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and * FAULT_FLAG_KILLABLE are not straightforward. The "Caution" * recommendation in __lock_page_or_retry is not an understatement. * * If FAULT_FLAG_ALLOW_RETRY is set, the mmap_lock must be released * before returning VM_FAULT_RETRY only if FAULT_FLAG_RETRY_NOWAIT is * not set. * * If FAULT_FLAG_ALLOW_RETRY is set but FAULT_FLAG_KILLABLE is not * set, VM_FAULT_RETRY can still be returned if and only if there are * fatal_signal_pending()s, and the mmap_lock must be released before * returning it. */ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue uwq; vm_fault_t ret = VM_FAULT_SIGBUS; bool must_wait; unsigned int blocking_state; /* * We don't do userfault handling for the final child pid update * and when coredumping (faults triggered by get_dump_page()). */ if (current->flags & (PF_EXITING|PF_DUMPCORE)) goto out; assert_fault_locked(vmf); ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx) goto out; VM_WARN_ON_ONCE(ctx->mm != mm); /* Any unrecognized flag is a bug. */ VM_WARN_ON_ONCE(reason & ~__VM_UFFD_FLAGS); /* 0 or > 1 flags set is a bug; we expect exactly 1. */ VM_WARN_ON_ONCE(!reason || (reason & (reason - 1))); if (ctx->features & UFFD_FEATURE_SIGBUS) goto out; if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY)) goto out; /* * Check that we can return VM_FAULT_RETRY. * * NOTE: it should become possible to return VM_FAULT_RETRY * even if FAULT_FLAG_TRIED is set without leading to gup() * -EBUSY failures, if the userfaultfd is to be extended for * VM_UFFD_WP tracking and we intend to arm the userfault * without first stopping userland access to the memory. For * VM_UFFD_MISSING userfaults this is enough for now. */ if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) { /* * Validate the invariant that nowait must allow retry * to be sure not to return SIGBUS erroneously on * nowait invocations. */ VM_WARN_ON_ONCE(vmf->flags & FAULT_FLAG_RETRY_NOWAIT); #ifdef CONFIG_DEBUG_VM if (printk_ratelimit()) { pr_warn("FAULT_FLAG_ALLOW_RETRY missing %x\n", vmf->flags); dump_stack(); } #endif goto out; } /* * Handle nowait, not much to do other than tell it to retry * and wait. */ ret = VM_FAULT_RETRY; if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out; if (unlikely(READ_ONCE(ctx->released))) { /* * If a concurrent release is detected, do not return * VM_FAULT_SIGBUS or VM_FAULT_NOPAGE, but instead always * return VM_FAULT_RETRY with lock released proactively. * * If we were to return VM_FAULT_SIGBUS here, the non * cooperative manager would be instead forced to * always call UFFDIO_UNREGISTER before it can safely * close the uffd, to avoid involuntary SIGBUS triggered. * * If we were to return VM_FAULT_NOPAGE, it would work for * the fault path, in which the lock will be released * later. However for GUP, faultin_page() does nothing * special on NOPAGE, so GUP would spin retrying without * releasing the mmap read lock, causing possible livelock. * * Here only VM_FAULT_RETRY would make sure the mmap lock * be released immediately, so that the thread concurrently * releasing the userfault would always make progress. */ release_fault_lock(vmf); goto out; } /* take the reference before dropping the mmap_lock */ userfaultfd_ctx_get(ctx); init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags, reason, ctx->features); uwq.ctx = ctx; uwq.waken = false; blocking_state = userfaultfd_get_blocking_state(vmf->flags); /* * Take the vma lock now, in order to safely call * userfaultfd_huge_must_wait() later. Since acquiring the * (sleepable) vma lock can modify the current task state, that * must be before explicitly calling set_current_state(). */ if (is_vm_hugetlb_page(vma)) hugetlb_vma_lock_read(vma); spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). */ __add_wait_queue(&ctx->fault_pending_wqh, &uwq.wq); /* * The smp_mb() after __set_current_state prevents the reads * following the spin_unlock to happen before the list_add in * __add_wait_queue. */ set_current_state(blocking_state); spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (is_vm_hugetlb_page(vma)) { must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason); hugetlb_vma_unlock_read(vma); } else { must_wait = userfaultfd_must_wait(ctx, vmf, reason); } release_fault_lock(vmf); if (likely(must_wait && !READ_ONCE(ctx->released))) { wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); } __set_current_state(TASK_RUNNING); /* * Here we race with the list_del; list_add in * userfaultfd_ctx_read(), however because we don't ever run * list_del_init() to refile across the two lists, the prev * and next pointers will never point to self. list_add also * would never let any of the two pointers to point to * self. So list_empty_careful won't risk to see both pointers * pointing to self at any time during the list refile. The * only case where list_del_init() is called is the full * removal in the wake function and there we don't re-list_add * and it's fine not to block on the spinlock. The uwq on this * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.entry)) { spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.entry); spin_unlock_irq(&ctx->fault_pending_wqh.lock); } /* * ctx may go away after this if the userfault pseudo fd is * already released. */ userfaultfd_ctx_put(ctx); out: return ret; } static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { struct userfaultfd_ctx *release_new_ctx; if (WARN_ON_ONCE(current->flags & PF_EXITING)) goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); release_new_ctx = NULL; spin_lock_irq(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). */ __add_wait_queue(&ctx->event_wqh, &ewq->wq); for (;;) { set_current_state(TASK_KILLABLE); if (ewq->msg.event == 0) break; if (READ_ONCE(ctx->released) || fatal_signal_pending(current)) { /* * &ewq->wq may be queued in fork_event, but * __remove_wait_queue ignores the head * parameter. It would be a problem if it * didn't. */ __remove_wait_queue(&ctx->event_wqh, &ewq->wq); if (ewq->msg.event == UFFD_EVENT_FORK) { struct userfaultfd_ctx *new; new = (struct userfaultfd_ctx *) (unsigned long) ewq->msg.arg.reserved.reserved1; release_new_ctx = new; } break; } spin_unlock_irq(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); spin_lock_irq(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); spin_unlock_irq(&ctx->event_wqh.lock); if (release_new_ctx) { userfaultfd_release_new(release_new_ctx); userfaultfd_ctx_put(release_new_ctx); } /* * ctx may go away after this if the userfault pseudo fd is * already released. */ out: atomic_dec(&ctx->mmap_changing); VM_WARN_ON_ONCE(atomic_read(&ctx->mmap_changing) < 0); userfaultfd_ctx_put(ctx); } static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { ewq->msg.event = 0; wake_up_locked(&ctx->event_wqh); __remove_wait_queue(&ctx->event_wqh, &ewq->wq); } int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) { struct userfaultfd_ctx *ctx = NULL, *octx; struct userfaultfd_fork_ctx *fctx; octx = vma->vm_userfaultfd_ctx.ctx; if (!octx) return 0; if (!(octx->features & UFFD_FEATURE_EVENT_FORK)) { userfaultfd_reset_ctx(vma); return 0; } list_for_each_entry(fctx, fcs, list) if (fctx->orig == octx) { ctx = fctx->new; break; } if (!ctx) { fctx = kmalloc(sizeof(*fctx), GFP_KERNEL); if (!fctx) return -ENOMEM; ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); if (!ctx) { kfree(fctx); return -ENOMEM; } refcount_set(&ctx->refcount, 1); ctx->flags = octx->flags; ctx->features = octx->features; ctx->released = false; init_rwsem(&ctx->map_changing_lock); atomic_set(&ctx->mmap_changing, 0); ctx->mm = vma->vm_mm; mmgrab(ctx->mm); userfaultfd_ctx_get(octx); down_write(&octx->map_changing_lock); atomic_inc(&octx->mmap_changing); up_write(&octx->map_changing_lock); fctx->orig = octx; fctx->new = ctx; list_add_tail(&fctx->list, fcs); } vma->vm_userfaultfd_ctx.ctx = ctx; return 0; } static void dup_fctx(struct userfaultfd_fork_ctx *fctx) { struct userfaultfd_ctx *ctx = fctx->orig; struct userfaultfd_wait_queue ewq; msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_FORK; ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; userfaultfd_event_wait_completion(ctx, &ewq); } void dup_userfaultfd_complete(struct list_head *fcs) { struct userfaultfd_fork_ctx *fctx, *n; list_for_each_entry_safe(fctx, n, fcs, list) { dup_fctx(fctx); list_del(&fctx->list); kfree(fctx); } } void dup_userfaultfd_fail(struct list_head *fcs) { struct userfaultfd_fork_ctx *fctx, *n; /* * An error has occurred on fork, we will tear memory down, but have * allocated memory for fctx's and raised reference counts for both the * original and child contexts (and on the mm for each as a result). * * These would ordinarily be taken care of by a user handling the event, * but we are no longer doing so, so manually clean up here. * * mm tear down will take care of cleaning up VMA contexts. */ list_for_each_entry_safe(fctx, n, fcs, list) { struct userfaultfd_ctx *octx = fctx->orig; struct userfaultfd_ctx *ctx = fctx->new; atomic_dec(&octx->mmap_changing); VM_WARN_ON_ONCE(atomic_read(&octx->mmap_changing) < 0); userfaultfd_ctx_put(octx); userfaultfd_ctx_put(ctx); list_del(&fctx->list); kfree(fctx); } } void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *vm_ctx) { struct userfaultfd_ctx *ctx; ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx) return; if (ctx->features & UFFD_FEATURE_EVENT_REMAP) { vm_ctx->ctx = ctx; userfaultfd_ctx_get(ctx); down_write(&ctx->map_changing_lock); atomic_inc(&ctx->mmap_changing); up_write(&ctx->map_changing_lock); } else { /* Drop uffd context if remap feature not enabled */ userfaultfd_reset_ctx(vma); } } void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, unsigned long from, unsigned long to, unsigned long len) { struct userfaultfd_ctx *ctx = vm_ctx->ctx; struct userfaultfd_wait_queue ewq; if (!ctx) return; msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMAP; ewq.msg.arg.remap.from = from; ewq.msg.arg.remap.to = to; ewq.msg.arg.remap.len = len; userfaultfd_event_wait_completion(ctx, &ewq); } void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *vm_ctx) { struct userfaultfd_ctx *ctx = vm_ctx->ctx; if (!ctx) return; userfaultfd_ctx_put(ctx); } bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue ewq; ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) return true; userfaultfd_ctx_get(ctx); down_write(&ctx->map_changing_lock); atomic_inc(&ctx->mmap_changing); up_write(&ctx->map_changing_lock); mmap_read_unlock(mm); msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMOVE; ewq.msg.arg.remove.start = start; ewq.msg.arg.remove.end = end; userfaultfd_event_wait_completion(ctx, &ewq); return false; } static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, unsigned long start, unsigned long end) { struct userfaultfd_unmap_ctx *unmap_ctx; list_for_each_entry(unmap_ctx, unmaps, list) if (unmap_ctx->ctx == ctx && unmap_ctx->start == start && unmap_ctx->end == end) return true; return false; } int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *unmaps) { struct userfaultfd_unmap_ctx *unmap_ctx; struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || has_unmap_ctx(ctx, unmaps, start, end)) return 0; unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); if (!unmap_ctx) return -ENOMEM; userfaultfd_ctx_get(ctx); down_write(&ctx->map_changing_lock); atomic_inc(&ctx->mmap_changing); up_write(&ctx->map_changing_lock); unmap_ctx->ctx = ctx; unmap_ctx->start = start; unmap_ctx->end = end; list_add_tail(&unmap_ctx->list, unmaps); return 0; } void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) { struct userfaultfd_unmap_ctx *ctx, *n; struct userfaultfd_wait_queue ewq; list_for_each_entry_safe(ctx, n, uf, list) { msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_UNMAP; ewq.msg.arg.remove.start = ctx->start; ewq.msg.arg.remove.end = ctx->end; userfaultfd_event_wait_completion(ctx->ctx, &ewq); list_del(&ctx->list); kfree(ctx); } } static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; struct mm_struct *mm = ctx->mm; /* len == 0 means wake all */ struct userfaultfd_wake_range range = { .len = 0, }; WRITE_ONCE(ctx->released, true); userfaultfd_release_all(mm, ctx); /* * After no new page faults can wait on this fault_*wqh, flush * the last page faults that may have been already waiting on * the fault_*wqh. */ spin_lock_irq(&ctx->fault_pending_wqh.lock); __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* Flush pending events that may still wait on event_wqh */ wake_up_all(&ctx->event_wqh); wake_up_poll(&ctx->fd_wqh, EPOLLHUP); userfaultfd_ctx_put(ctx); return 0; } /* fault_pending_wqh.lock must be hold by the caller */ static inline struct userfaultfd_wait_queue *find_userfault_in( wait_queue_head_t *wqh) { wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; lockdep_assert_held(&wqh->lock); uwq = NULL; if (!waitqueue_active(wqh)) goto out; /* walk in reverse to provide FIFO behavior to read userfaults */ wq = list_last_entry(&wqh->head, typeof(*wq), entry); uwq = container_of(wq, struct userfaultfd_wait_queue, wq); out: return uwq; } static inline struct userfaultfd_wait_queue *find_userfault( struct userfaultfd_ctx *ctx) { return find_userfault_in(&ctx->fault_pending_wqh); } static inline struct userfaultfd_wait_queue *find_userfault_evt( struct userfaultfd_ctx *ctx) { return find_userfault_in(&ctx->event_wqh); } static __poll_t userfaultfd_poll(struct file *file, poll_table *wait) { struct userfaultfd_ctx *ctx = file->private_data; __poll_t ret; poll_wait(file, &ctx->fd_wqh, wait); if (!userfaultfd_is_initialized(ctx)) return EPOLLERR; /* * poll() never guarantees that read won't block. * userfaults can be waken before they're read(). */ if (unlikely(!(file->f_flags & O_NONBLOCK))) return EPOLLERR; /* * lockless access to see if there are pending faults * __pollwait last action is the add_wait_queue but * the spin_unlock would allow the waitqueue_active to * pass above the actual list_add inside * add_wait_queue critical section. So use a full * memory barrier to serialize the list_add write of * add_wait_queue() with the waitqueue_active read * below. */ ret = 0; smp_mb(); if (waitqueue_active(&ctx->fault_pending_wqh)) ret = EPOLLIN; else if (waitqueue_active(&ctx->event_wqh)) ret = EPOLLIN; return ret; } static const struct file_operations userfaultfd_fops; static int resolve_userfault_fork(struct userfaultfd_ctx *new, struct inode *inode, struct uffd_msg *msg) { int fd; fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, new, O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); if (fd < 0) return fd; msg->arg.reserved.reserved1 = 0; msg->arg.fork.ufd = fd; return 0; } static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, struct uffd_msg *msg, struct inode *inode) { ssize_t ret; DECLARE_WAITQUEUE(wait, current); struct userfaultfd_wait_queue *uwq; /* * Handling fork event requires sleeping operations, so * we drop the event_wqh lock, then do these ops, then * lock it back and wake up the waiter. While the lock is * dropped the ewq may go away so we keep track of it * carefully. */ LIST_HEAD(fork_event); struct userfaultfd_ctx *fork_nctx = NULL; /* always take the fd_wqh lock before the fault_pending_wqh lock */ spin_lock_irq(&ctx->fd_wqh.lock); __add_wait_queue(&ctx->fd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); spin_lock(&ctx->fault_pending_wqh.lock); uwq = find_userfault(ctx); if (uwq) { /* * Use a seqcount to repeat the lockless check * in wake_userfault() to avoid missing * wakeups because during the refile both * waitqueue could become empty if this is the * only userfault. */ write_seqcount_begin(&ctx->refile_seq); /* * The fault_pending_wqh.lock prevents the uwq * to disappear from under us. * * Refile this userfault from * fault_pending_wqh to fault_wqh, it's not * pending anymore after we read it. * * Use list_del() by hand (as * userfaultfd_wake_function also uses * list_del_init() by hand) to be sure nobody * changes __remove_wait_queue() to use * list_del_init() in turn breaking the * !list_empty_careful() check in * handle_userfault(). The uwq->wq.head list * must never be empty at any time during the * refile, or the waitqueue could disappear * from under us. The "wait_queue_head_t" * parameter of __remove_wait_queue() is unused * anyway. */ list_del(&uwq->wq.entry); add_wait_queue(&ctx->fault_wqh, &uwq->wq); write_seqcount_end(&ctx->refile_seq); /* careful to always initialize msg if ret == 0 */ *msg = uwq->msg; spin_unlock(&ctx->fault_pending_wqh.lock); ret = 0; break; } spin_unlock(&ctx->fault_pending_wqh.lock); spin_lock(&ctx->event_wqh.lock); uwq = find_userfault_evt(ctx); if (uwq) { *msg = uwq->msg; if (uwq->msg.event == UFFD_EVENT_FORK) { fork_nctx = (struct userfaultfd_ctx *) (unsigned long) uwq->msg.arg.reserved.reserved1; list_move(&uwq->wq.entry, &fork_event); /* * fork_nctx can be freed as soon as * we drop the lock, unless we take a * reference on it. */ userfaultfd_ctx_get(fork_nctx); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; } userfaultfd_event_complete(ctx, uwq); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; } spin_unlock(&ctx->event_wqh.lock); if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (no_wait) { ret = -EAGAIN; break; } spin_unlock_irq(&ctx->fd_wqh.lock); schedule(); spin_lock_irq(&ctx->fd_wqh.lock); } __remove_wait_queue(&ctx->fd_wqh, &wait); __set_current_state(TASK_RUNNING); spin_unlock_irq(&ctx->fd_wqh.lock); if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(fork_nctx, inode, msg); spin_lock_irq(&ctx->event_wqh.lock); if (!list_empty(&fork_event)) { /* * The fork thread didn't abort, so we can * drop the temporary refcount. */ userfaultfd_ctx_put(fork_nctx); uwq = list_first_entry(&fork_event, typeof(*uwq), wq.entry); /* * If fork_event list wasn't empty and in turn * the event wasn't already released by fork * (the event is allocated on fork kernel * stack), put the event back to its place in * the event_wq. fork_event head will be freed * as soon as we return so the event cannot * stay queued there no matter the current * "ret" value. */ list_del(&uwq->wq.entry); __add_wait_queue(&ctx->event_wqh, &uwq->wq); /* * Leave the event in the waitqueue and report * error to userland if we failed to resolve * the userfault fork. */ if (likely(!ret)) userfaultfd_event_complete(ctx, uwq); } else { /* * Here the fork thread aborted and the * refcount from the fork thread on fork_nctx * has already been released. We still hold * the reference we took before releasing the * lock above. If resolve_userfault_fork * failed we've to drop it because the * fork_nctx has to be freed in such case. If * it succeeded we'll hold it because the new * uffd references it. */ if (ret) userfaultfd_ctx_put(fork_nctx); } spin_unlock_irq(&ctx->event_wqh.lock); } return ret; } static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct userfaultfd_ctx *ctx = file->private_data; ssize_t _ret, ret = 0; struct uffd_msg msg; struct inode *inode = file_inode(file); bool no_wait; if (!userfaultfd_is_initialized(ctx)) return -EINVAL; no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; for (;;) { if (iov_iter_count(to) < sizeof(msg)) return ret ? ret : -EINVAL; _ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode); if (_ret < 0) return ret ? ret : _ret; _ret = !copy_to_iter_full(&msg, sizeof(msg), to); if (_ret) return ret ? ret : -EFAULT; ret += sizeof(msg); /* * Allow to read more than one fault at time but only * block if waiting for the very first one. */ no_wait = true; } } static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { spin_lock_irq(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); spin_unlock_irq(&ctx->fault_pending_wqh.lock); } static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { unsigned seq; bool need_wakeup; /* * To be sure waitqueue_active() is not reordered by the CPU * before the pagetable update, use an explicit SMP memory * barrier here. PT lock release or mmap_read_unlock(mm) still * have release semantics that can allow the * waitqueue_active() to be reordered before the pte update. */ smp_mb(); /* * Use waitqueue_active because it's very frequent to * change the address space atomically even if there are no * userfaults yet. So we take the spinlock only when we're * sure we've userfaults to wake. */ do { seq = read_seqcount_begin(&ctx->refile_seq); need_wakeup = waitqueue_active(&ctx->fault_pending_wqh) || waitqueue_active(&ctx->fault_wqh); cond_resched(); } while (read_seqcount_retry(&ctx->refile_seq, seq)); if (need_wakeup) __wake_userfault(ctx, range); } static __always_inline int validate_unaligned_range( struct mm_struct *mm, __u64 start, __u64 len) { __u64 task_size = mm->task_size; if (len & ~PAGE_MASK) return -EINVAL; if (!len) return -EINVAL; if (start < mmap_min_addr) return -EINVAL; if (start >= task_size) return -EINVAL; if (len > task_size - start) return -EINVAL; if (start + len <= start) return -EINVAL; return 0; } static __always_inline int validate_range(struct mm_struct *mm, __u64 start, __u64 len) { if (start & ~PAGE_MASK) return -EINVAL; return validate_unaligned_range(mm, start, len); } static int userfaultfd_register(struct userfaultfd_ctx *ctx, unsigned long arg) { struct mm_struct *mm = ctx->mm; struct vm_area_struct *vma, *cur; int ret; struct uffdio_register uffdio_register; struct uffdio_register __user *user_uffdio_register; vm_flags_t vm_flags; bool found; bool basic_ioctls; unsigned long start, end; struct vma_iterator vmi; bool wp_async = userfaultfd_wp_async_ctx(ctx); user_uffdio_register = (struct uffdio_register __user *) arg; ret = -EFAULT; if (copy_from_user(&uffdio_register, user_uffdio_register, sizeof(uffdio_register)-sizeof(__u64))) goto out; ret = -EINVAL; if (!uffdio_register.mode) goto out; if (uffdio_register.mode & ~UFFD_API_REGISTER_MODES) goto out; vm_flags = 0; if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING) vm_flags |= VM_UFFD_MISSING; if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) { if (!pgtable_supports_uffd_wp()) goto out; vm_flags |= VM_UFFD_WP; } if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) { #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR goto out; #endif vm_flags |= VM_UFFD_MINOR; } ret = validate_range(mm, uffdio_register.range.start, uffdio_register.range.len); if (ret) goto out; start = uffdio_register.range.start; end = start + uffdio_register.range.len; ret = -ENOMEM; if (!mmget_not_zero(mm)) goto out; ret = -EINVAL; mmap_write_lock(mm); vma_iter_init(&vmi, mm, start); vma = vma_find(&vmi, end); if (!vma) goto out_unlock; /* * If the first vma contains huge pages, make sure start address * is aligned to huge page size. */ if (is_vm_hugetlb_page(vma)) { unsigned long vma_hpagesize = vma_kernel_pagesize(vma); if (start & (vma_hpagesize - 1)) goto out_unlock; } /* * Search for not compatible vmas. */ found = false; basic_ioctls = false; cur = vma; do { cond_resched(); VM_WARN_ON_ONCE(!!cur->vm_userfaultfd_ctx.ctx ^ !!(cur->vm_flags & __VM_UFFD_FLAGS)); /* check not compatible vmas */ ret = -EINVAL; if (!vma_can_userfault(cur, vm_flags, wp_async)) goto out_unlock; /* * UFFDIO_COPY will fill file holes even without * PROT_WRITE. This check enforces that if this is a * MAP_SHARED, the process has write permission to the backing * file. If VM_MAYWRITE is set it also enforces that on a * MAP_SHARED vma: there is no F_WRITE_SEAL and no further * F_WRITE_SEAL can be taken until the vma is destroyed. */ ret = -EPERM; if (unlikely(!(cur->vm_flags & VM_MAYWRITE))) goto out_unlock; /* * If this vma contains ending address, and huge pages * check alignment. */ if (is_vm_hugetlb_page(cur) && end <= cur->vm_end && end > cur->vm_start) { unsigned long vma_hpagesize = vma_kernel_pagesize(cur); ret = -EINVAL; if (end & (vma_hpagesize - 1)) goto out_unlock; } if ((vm_flags & VM_UFFD_WP) && !(cur->vm_flags & VM_MAYWRITE)) goto out_unlock; /* * Check that this vma isn't already owned by a * different userfaultfd. We can't allow more than one * userfaultfd to own a single vma simultaneously or we * wouldn't know which one to deliver the userfaults to. */ ret = -EBUSY; if (cur->vm_userfaultfd_ctx.ctx && cur->vm_userfaultfd_ctx.ctx != ctx) goto out_unlock; /* * Note vmas containing huge pages */ if (is_vm_hugetlb_page(cur)) basic_ioctls = true; found = true; } for_each_vma_range(vmi, cur, end); VM_WARN_ON_ONCE(!found); ret = userfaultfd_register_range(ctx, vma, vm_flags, start, end, wp_async); out_unlock: mmap_write_unlock(mm); mmput(mm); if (!ret) { __u64 ioctls_out; ioctls_out = basic_ioctls ? UFFD_API_RANGE_IOCTLS_BASIC : UFFD_API_RANGE_IOCTLS; /* * Declare the WP ioctl only if the WP mode is * specified and all checks passed with the range */ if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)) ioctls_out &= ~((__u64)1 << _UFFDIO_WRITEPROTECT); /* CONTINUE ioctl is only supported for MINOR ranges. */ if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR)) ioctls_out &= ~((__u64)1 << _UFFDIO_CONTINUE); /* * Now that we scanned all vmas we can already tell * userland which ioctls methods are guaranteed to * succeed on this range. */ if (put_user(ioctls_out, &user_uffdio_register->ioctls)) ret = -EFAULT; } out: return ret; } static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, unsigned long arg) { struct mm_struct *mm = ctx->mm; struct vm_area_struct *vma, *prev, *cur; int ret; struct uffdio_range uffdio_unregister; bool found; unsigned long start, end, vma_end; const void __user *buf = (void __user *)arg; struct vma_iterator vmi; bool wp_async = userfaultfd_wp_async_ctx(ctx); ret = -EFAULT; if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) goto out; ret = validate_range(mm, uffdio_unregister.start, uffdio_unregister.len); if (ret) goto out; start = uffdio_unregister.start; end = start + uffdio_unregister.len; ret = -ENOMEM; if (!mmget_not_zero(mm)) goto out; mmap_write_lock(mm); ret = -EINVAL; vma_iter_init(&vmi, mm, start); vma = vma_find(&vmi, end); if (!vma) goto out_unlock; /* * If the first vma contains huge pages, make sure start address * is aligned to huge page size. */ if (is_vm_hugetlb_page(vma)) { unsigned long vma_hpagesize = vma_kernel_pagesize(vma); if (start & (vma_hpagesize - 1)) goto out_unlock; } /* * Search for not compatible vmas. */ found = false; cur = vma; do { cond_resched(); VM_WARN_ON_ONCE(!!cur->vm_userfaultfd_ctx.ctx ^ !!(cur->vm_flags & __VM_UFFD_FLAGS)); /* * Prevent unregistering through a different userfaultfd than * the one used for registration. */ if (cur->vm_userfaultfd_ctx.ctx && cur->vm_userfaultfd_ctx.ctx != ctx) goto out_unlock; /* * Check not compatible vmas, not strictly required * here as not compatible vmas cannot have an * userfaultfd_ctx registered on them, but this * provides for more strict behavior to notice * unregistration errors. */ if (!vma_can_userfault(cur, cur->vm_flags, wp_async)) goto out_unlock; found = true; } for_each_vma_range(vmi, cur, end); VM_WARN_ON_ONCE(!found); vma_iter_set(&vmi, start); prev = vma_prev(&vmi); if (vma->vm_start < start) prev = vma; ret = 0; for_each_vma_range(vmi, vma, end) { cond_resched(); /* VMA not registered with userfaultfd. */ if (!vma->vm_userfaultfd_ctx.ctx) goto skip; VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx != ctx); VM_WARN_ON_ONCE(!vma_can_userfault(vma, vma->vm_flags, wp_async)); VM_WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE)); if (vma->vm_start > start) start = vma->vm_start; vma_end = min(end, vma->vm_end); if (userfaultfd_missing(vma)) { /* * Wake any concurrent pending userfault while * we unregister, so they will not hang * permanently and it avoids userland to call * UFFDIO_WAKE explicitly. */ struct userfaultfd_wake_range range; range.start = start; range.len = vma_end - start; wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); } vma = userfaultfd_clear_vma(&vmi, prev, vma, start, vma_end); if (IS_ERR(vma)) { ret = PTR_ERR(vma); break; } skip: prev = vma; start = vma->vm_end; } out_unlock: mmap_write_unlock(mm); mmput(mm); out: return ret; } /* * userfaultfd_wake may be used in combination with the * UFFDIO_*_MODE_DONTWAKE to wakeup userfaults in batches. */ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, unsigned long arg) { int ret; struct uffdio_range uffdio_wake; struct userfaultfd_wake_range range; const void __user *buf = (void __user *)arg; ret = -EFAULT; if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) goto out; ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); if (ret) goto out; range.start = uffdio_wake.start; range.len = uffdio_wake.len; /* * len == 0 means wake all and we don't want to wake all here, * so check it again to be sure. */ VM_WARN_ON_ONCE(!range.len); wake_userfault(ctx, &range); ret = 0; out: return ret; } static int userfaultfd_copy(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_copy uffdio_copy; struct uffdio_copy __user *user_uffdio_copy; struct userfaultfd_wake_range range; uffd_flags_t flags = 0; user_uffdio_copy = (struct uffdio_copy __user *) arg; ret = -EAGAIN; if (unlikely(atomic_read(&ctx->mmap_changing))) { if (unlikely(put_user(ret, &user_uffdio_copy->copy))) return -EFAULT; goto out; } ret = -EFAULT; if (copy_from_user(&uffdio_copy, user_uffdio_copy, /* don't copy "copy" last field */ sizeof(uffdio_copy)-sizeof(__s64))) goto out; ret = validate_unaligned_range(ctx->mm, uffdio_copy.src, uffdio_copy.len); if (ret) goto out; ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); if (ret) goto out; ret = -EINVAL; if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP)) goto out; if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP) flags |= MFILL_ATOMIC_WP; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_copy(ctx, uffdio_copy.dst, uffdio_copy.src, uffdio_copy.len, flags); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_copy->copy))) return -EFAULT; if (ret < 0) goto out; VM_WARN_ON_ONCE(!ret); /* len == 0 would wake all */ range.len = ret; if (!(uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE)) { range.start = uffdio_copy.dst; wake_userfault(ctx, &range); } ret = range.len == uffdio_copy.len ? 0 : -EAGAIN; out: return ret; } static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_zeropage uffdio_zeropage; struct uffdio_zeropage __user *user_uffdio_zeropage; struct userfaultfd_wake_range range; user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; ret = -EAGAIN; if (unlikely(atomic_read(&ctx->mmap_changing))) { if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) return -EFAULT; goto out; } ret = -EFAULT; if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, /* don't copy "zeropage" last field */ sizeof(uffdio_zeropage)-sizeof(__s64))) goto out; ret = validate_range(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); if (ret) goto out; ret = -EINVAL; if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE) goto out; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_zeropage(ctx, uffdio_zeropage.range.start, uffdio_zeropage.range.len); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) return -EFAULT; if (ret < 0) goto out; /* len == 0 would wake all */ VM_WARN_ON_ONCE(!ret); range.len = ret; if (!(uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_DONTWAKE)) { range.start = uffdio_zeropage.range.start; wake_userfault(ctx, &range); } ret = range.len == uffdio_zeropage.range.len ? 0 : -EAGAIN; out: return ret; } static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, unsigned long arg) { int ret; struct uffdio_writeprotect uffdio_wp; struct uffdio_writeprotect __user *user_uffdio_wp; struct userfaultfd_wake_range range; bool mode_wp, mode_dontwake; if (atomic_read(&ctx->mmap_changing)) return -EAGAIN; user_uffdio_wp = (struct uffdio_writeprotect __user *) arg; if (copy_from_user(&uffdio_wp, user_uffdio_wp, sizeof(struct uffdio_writeprotect))) return -EFAULT; ret = validate_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len); if (ret) return ret; if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE | UFFDIO_WRITEPROTECT_MODE_WP)) return -EINVAL; mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP; mode_dontwake = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE; if (mode_wp && mode_dontwake) return -EINVAL; if (mmget_not_zero(ctx->mm)) { ret = mwriteprotect_range(ctx, uffdio_wp.range.start, uffdio_wp.range.len, mode_wp); mmput(ctx->mm); } else { return -ESRCH; } if (ret) return ret; if (!mode_wp && !mode_dontwake) { range.start = uffdio_wp.range.start; range.len = uffdio_wp.range.len; wake_userfault(ctx, &range); } return ret; } static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_continue uffdio_continue; struct uffdio_continue __user *user_uffdio_continue; struct userfaultfd_wake_range range; uffd_flags_t flags = 0; user_uffdio_continue = (struct uffdio_continue __user *)arg; ret = -EAGAIN; if (unlikely(atomic_read(&ctx->mmap_changing))) { if (unlikely(put_user(ret, &user_uffdio_continue->mapped))) return -EFAULT; goto out; } ret = -EFAULT; if (copy_from_user(&uffdio_continue, user_uffdio_continue, /* don't copy the output fields */ sizeof(uffdio_continue) - (sizeof(__s64)))) goto out; ret = validate_range(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len); if (ret) goto out; ret = -EINVAL; if (uffdio_continue.mode & ~(UFFDIO_CONTINUE_MODE_DONTWAKE | UFFDIO_CONTINUE_MODE_WP)) goto out; if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_WP) flags |= MFILL_ATOMIC_WP; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_continue(ctx, uffdio_continue.range.start, uffdio_continue.range.len, flags); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_continue->mapped))) return -EFAULT; if (ret < 0) goto out; /* len == 0 would wake all */ VM_WARN_ON_ONCE(!ret); range.len = ret; if (!(uffdio_continue.mode & UFFDIO_CONTINUE_MODE_DONTWAKE)) { range.start = uffdio_continue.range.start; wake_userfault(ctx, &range); } ret = range.len == uffdio_continue.range.len ? 0 : -EAGAIN; out: return ret; } static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_poison uffdio_poison; struct uffdio_poison __user *user_uffdio_poison; struct userfaultfd_wake_range range; user_uffdio_poison = (struct uffdio_poison __user *)arg; ret = -EAGAIN; if (unlikely(atomic_read(&ctx->mmap_changing))) { if (unlikely(put_user(ret, &user_uffdio_poison->updated))) return -EFAULT; goto out; } ret = -EFAULT; if (copy_from_user(&uffdio_poison, user_uffdio_poison, /* don't copy the output fields */ sizeof(uffdio_poison) - (sizeof(__s64)))) goto out; ret = validate_range(ctx->mm, uffdio_poison.range.start, uffdio_poison.range.len); if (ret) goto out; ret = -EINVAL; if (uffdio_poison.mode & ~UFFDIO_POISON_MODE_DONTWAKE) goto out; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_poison(ctx, uffdio_poison.range.start, uffdio_poison.range.len, 0); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_poison->updated))) return -EFAULT; if (ret < 0) goto out; /* len == 0 would wake all */ VM_WARN_ON_ONCE(!ret); range.len = ret; if (!(uffdio_poison.mode & UFFDIO_POISON_MODE_DONTWAKE)) { range.start = uffdio_poison.range.start; wake_userfault(ctx, &range); } ret = range.len == uffdio_poison.range.len ? 0 : -EAGAIN; out: return ret; } bool userfaultfd_wp_async(struct vm_area_struct *vma) { return userfaultfd_wp_async_ctx(vma->vm_userfaultfd_ctx.ctx); } static inline unsigned int uffd_ctx_features(__u64 user_features) { /* * For the current set of features the bits just coincide. Set * UFFD_FEATURE_INITIALIZED to mark the features as enabled. */ return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED; } static int userfaultfd_move(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_move uffdio_move; struct uffdio_move __user *user_uffdio_move; struct userfaultfd_wake_range range; struct mm_struct *mm = ctx->mm; user_uffdio_move = (struct uffdio_move __user *) arg; ret = -EAGAIN; if (unlikely(atomic_read(&ctx->mmap_changing))) { if (unlikely(put_user(ret, &user_uffdio_move->move))) return -EFAULT; goto out; } if (copy_from_user(&uffdio_move, user_uffdio_move, /* don't copy "move" last field */ sizeof(uffdio_move)-sizeof(__s64))) return -EFAULT; /* Do not allow cross-mm moves. */ if (mm != current->mm) return -EINVAL; ret = validate_range(mm, uffdio_move.dst, uffdio_move.len); if (ret) return ret; ret = validate_range(mm, uffdio_move.src, uffdio_move.len); if (ret) return ret; if (uffdio_move.mode & ~(UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES| UFFDIO_MOVE_MODE_DONTWAKE)) return -EINVAL; if (mmget_not_zero(mm)) { ret = move_pages(ctx, uffdio_move.dst, uffdio_move.src, uffdio_move.len, uffdio_move.mode); mmput(mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_move->move))) return -EFAULT; if (ret < 0) goto out; /* len == 0 would wake all */ VM_WARN_ON(!ret); range.len = ret; if (!(uffdio_move.mode & UFFDIO_MOVE_MODE_DONTWAKE)) { range.start = uffdio_move.dst; wake_userfault(ctx, &range); } ret = range.len == uffdio_move.len ? 0 : -EAGAIN; out: return ret; } /* * userland asks for a certain API version and we return which bits * and ioctl commands are implemented in this kernel for such API * version or -EINVAL if unknown. */ static int userfaultfd_api(struct userfaultfd_ctx *ctx, unsigned long arg) { struct uffdio_api uffdio_api; void __user *buf = (void __user *)arg; unsigned int ctx_features; int ret; __u64 features; ret = -EFAULT; if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; features = uffdio_api.features; ret = -EINVAL; if (uffdio_api.api != UFFD_API) goto err_out; ret = -EPERM; if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) goto err_out; /* WP_ASYNC relies on WP_UNPOPULATED, choose it unconditionally */ if (features & UFFD_FEATURE_WP_ASYNC) features |= UFFD_FEATURE_WP_UNPOPULATED; /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR uffdio_api.features &= ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM); #endif if (!pgtable_supports_uffd_wp()) uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; if (!uffd_supports_wp_marker()) { uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; } ret = -EINVAL; if (features & ~uffdio_api.features) goto err_out; uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) goto out; /* only enable the requested features for this uffd context */ ctx_features = uffd_ctx_features(features); ret = -EINVAL; if (cmpxchg(&ctx->features, 0, ctx_features) != 0) goto err_out; ret = 0; out: return ret; err_out: memset(&uffdio_api, 0, sizeof(uffdio_api)); if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) ret = -EFAULT; goto out; } static long userfaultfd_ioctl(struct file *file, unsigned cmd, unsigned long arg) { int ret = -EINVAL; struct userfaultfd_ctx *ctx = file->private_data; if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx)) return -EINVAL; switch(cmd) { case UFFDIO_API: ret = userfaultfd_api(ctx, arg); break; case UFFDIO_REGISTER: ret = userfaultfd_register(ctx, arg); break; case UFFDIO_UNREGISTER: ret = userfaultfd_unregister(ctx, arg); break; case UFFDIO_WAKE: ret = userfaultfd_wake(ctx, arg); break; case UFFDIO_COPY: ret = userfaultfd_copy(ctx, arg); break; case UFFDIO_ZEROPAGE: ret = userfaultfd_zeropage(ctx, arg); break; case UFFDIO_MOVE: ret = userfaultfd_move(ctx, arg); break; case UFFDIO_WRITEPROTECT: ret = userfaultfd_writeprotect(ctx, arg); break; case UFFDIO_CONTINUE: ret = userfaultfd_continue(ctx, arg); break; case UFFDIO_POISON: ret = userfaultfd_poison(ctx, arg); break; } return ret; } #ifdef CONFIG_PROC_FS static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) { struct userfaultfd_ctx *ctx = f->private_data; wait_queue_entry_t *wq; unsigned long pending = 0, total = 0; spin_lock_irq(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { pending++; total++; } list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { total++; } spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* * If more protocols will be added, there will be all shown * separated by a space. Like this: * protocols: aa:... bb:... */ seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n", pending, total, UFFD_API, ctx->features, UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS); } #endif static const struct file_operations userfaultfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = userfaultfd_show_fdinfo, #endif .release = userfaultfd_release, .poll = userfaultfd_poll, .read_iter = userfaultfd_read_iter, .unlocked_ioctl = userfaultfd_ioctl, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; static void init_once_userfaultfd_ctx(void *mem) { struct userfaultfd_ctx *ctx = (struct userfaultfd_ctx *) mem; init_waitqueue_head(&ctx->fault_pending_wqh); init_waitqueue_head(&ctx->fault_wqh); init_waitqueue_head(&ctx->event_wqh); init_waitqueue_head(&ctx->fd_wqh); seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock); } static int new_userfaultfd(int flags) { struct userfaultfd_ctx *ctx __free(kfree) = NULL; VM_WARN_ON_ONCE(!current->mm); /* Check the UFFD_* constants for consistency. */ BUILD_BUG_ON(UFFD_USER_MODE_ONLY & UFFD_SHARED_FCNTL_FLAGS); if (flags & ~(UFFD_SHARED_FCNTL_FLAGS | UFFD_USER_MODE_ONLY)) return -EINVAL; ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); if (!ctx) return -ENOMEM; refcount_set(&ctx->refcount, 1); ctx->flags = flags; ctx->features = 0; ctx->released = false; init_rwsem(&ctx->map_changing_lock); atomic_set(&ctx->mmap_changing, 0); ctx->mm = current->mm; FD_PREPARE(fdf, flags & UFFD_SHARED_FCNTL_FLAGS, anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL)); if (fdf.err) return fdf.err; /* prevent the mm struct to be freed */ mmgrab(ctx->mm); fd_prepare_file(fdf)->f_mode |= FMODE_NOWAIT; retain_and_null_ptr(ctx); return fd_publish(fdf); } static inline bool userfaultfd_syscall_allowed(int flags) { /* Userspace-only page faults are always allowed */ if (flags & UFFD_USER_MODE_ONLY) return true; /* * The user is requesting a userfaultfd which can handle kernel faults. * Privileged users are always allowed to do this. */ if (capable(CAP_SYS_PTRACE)) return true; /* Otherwise, access to kernel fault handling is sysctl controlled. */ return sysctl_unprivileged_userfaultfd; } SYSCALL_DEFINE1(userfaultfd, int, flags) { if (!userfaultfd_syscall_allowed(flags)) return -EPERM; return new_userfaultfd(flags); } static long userfaultfd_dev_ioctl(struct file *file, unsigned int cmd, unsigned long flags) { if (cmd != USERFAULTFD_IOC_NEW) return -EINVAL; return new_userfaultfd(flags); } static const struct file_operations userfaultfd_dev_fops = { .unlocked_ioctl = userfaultfd_dev_ioctl, .compat_ioctl = userfaultfd_dev_ioctl, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice userfaultfd_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "userfaultfd", .fops = &userfaultfd_dev_fops }; static int __init userfaultfd_init(void) { int ret; ret = misc_register(&userfaultfd_misc); if (ret) return ret; userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache", sizeof(struct userfaultfd_ctx), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once_userfaultfd_ctx); #ifdef CONFIG_SYSCTL register_sysctl_init("vm", vm_userfaultfd_table); #endif return 0; } __initcall(userfaultfd_init); |
| 13 1 11 9 8 13 3 2 1 1 5 5 5 5 5 5 5 5 5 5 5 5 5 2 2 2 2 2 2 2 1 2 1 1 1 1 1 13 13 9 4 9 7 2 1 1 4 5 7 3 1 4 2 2 2 2 2 2 2 2 1 2 1 2 3 13 3 7 5 4 4 2 2 1 2 4 31 32 1 2 1 1 5 1 4 4 3 2 3 2 2 1 3 1 1 1 13 12 13 3 3 2 2 1 1 1 2 32 12 10 4 4 4 4 9 9 9 10 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2001 Paul Stewart * Copyright (c) 2001 Vojtech Pavlik * * HID char devices, giving access to raw HID device events. */ /* * * Should you need to contact me, the author, you can do so either by * e-mail - mail your message to Paul Stewart <stewart@wetlogic.net> */ #include <linux/poll.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/module.h> #include <linux/init.h> #include <linux/input.h> #include <linux/usb.h> #include <linux/hid.h> #include <linux/hiddev.h> #include <linux/compat.h> #include <linux/vmalloc.h> #include <linux/nospec.h> #include "usbhid.h" #ifdef CONFIG_USB_DYNAMIC_MINORS #define HIDDEV_MINOR_BASE 0 #define HIDDEV_MINORS 256 #else #define HIDDEV_MINOR_BASE 96 #define HIDDEV_MINORS 16 #endif #define HIDDEV_BUFFER_SIZE 2048 struct hiddev_list { struct hiddev_usage_ref buffer[HIDDEV_BUFFER_SIZE]; int head; int tail; unsigned flags; struct fasync_struct *fasync; struct hiddev *hiddev; struct list_head node; struct mutex thread_lock; }; /* * Find a report, given the report's type and ID. The ID can be specified * indirectly by REPORT_ID_FIRST (which returns the first report of the given * type) or by (REPORT_ID_NEXT | old_id), which returns the next report of the * given type which follows old_id. */ static struct hid_report * hiddev_lookup_report(struct hid_device *hid, struct hiddev_report_info *rinfo) { unsigned int flags = rinfo->report_id & ~HID_REPORT_ID_MASK; unsigned int rid = rinfo->report_id & HID_REPORT_ID_MASK; struct hid_report_enum *report_enum; struct hid_report *report; struct list_head *list; if (rinfo->report_type < HID_REPORT_TYPE_MIN || rinfo->report_type > HID_REPORT_TYPE_MAX) return NULL; report_enum = hid->report_enum + (rinfo->report_type - HID_REPORT_TYPE_MIN); switch (flags) { case 0: /* Nothing to do -- report_id is already set correctly */ break; case HID_REPORT_ID_FIRST: if (list_empty(&report_enum->report_list)) return NULL; list = report_enum->report_list.next; report = list_entry(list, struct hid_report, list); rinfo->report_id = report->id; break; case HID_REPORT_ID_NEXT: report = report_enum->report_id_hash[rid]; if (!report) return NULL; list = report->list.next; if (list == &report_enum->report_list) return NULL; report = list_entry(list, struct hid_report, list); rinfo->report_id = report->id; break; default: return NULL; } return report_enum->report_id_hash[rinfo->report_id]; } /* * Perform an exhaustive search of the report table for a usage, given its * type and usage id. */ static struct hid_field * hiddev_lookup_usage(struct hid_device *hid, struct hiddev_usage_ref *uref) { int i, j; struct hid_report *report; struct hid_report_enum *report_enum; struct hid_field *field; if (uref->report_type < HID_REPORT_TYPE_MIN || uref->report_type > HID_REPORT_TYPE_MAX) return NULL; report_enum = hid->report_enum + (uref->report_type - HID_REPORT_TYPE_MIN); list_for_each_entry(report, &report_enum->report_list, list) { for (i = 0; i < report->maxfield; i++) { field = report->field[i]; for (j = 0; j < field->maxusage; j++) { if (field->usage[j].hid == uref->usage_code) { uref->report_id = report->id; uref->field_index = i; uref->usage_index = j; return field; } } } } return NULL; } static void hiddev_send_event(struct hid_device *hid, struct hiddev_usage_ref *uref) { struct hiddev *hiddev = hid->hiddev; struct hiddev_list *list; unsigned long flags; spin_lock_irqsave(&hiddev->list_lock, flags); list_for_each_entry(list, &hiddev->list, node) { if (uref->field_index != HID_FIELD_INDEX_NONE || (list->flags & HIDDEV_FLAG_REPORT) != 0) { list->buffer[list->head] = *uref; list->head = (list->head + 1) & (HIDDEV_BUFFER_SIZE - 1); kill_fasync(&list->fasync, SIGIO, POLL_IN); } } spin_unlock_irqrestore(&hiddev->list_lock, flags); wake_up_interruptible(&hiddev->wait); } /* * This is where hid.c calls into hiddev to pass an event that occurred over * the interrupt pipe */ void hiddev_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { unsigned type = field->report_type; struct hiddev_usage_ref uref; uref.report_type = (type == HID_INPUT_REPORT) ? HID_REPORT_TYPE_INPUT : ((type == HID_OUTPUT_REPORT) ? HID_REPORT_TYPE_OUTPUT : ((type == HID_FEATURE_REPORT) ? HID_REPORT_TYPE_FEATURE : 0)); uref.report_id = field->report->id; uref.field_index = field->index; uref.usage_index = (usage - field->usage); uref.usage_code = usage->hid; uref.value = value; hiddev_send_event(hid, &uref); } EXPORT_SYMBOL_GPL(hiddev_hid_event); void hiddev_report_event(struct hid_device *hid, struct hid_report *report) { unsigned type = report->type; struct hiddev_usage_ref uref; memset(&uref, 0, sizeof(uref)); uref.report_type = (type == HID_INPUT_REPORT) ? HID_REPORT_TYPE_INPUT : ((type == HID_OUTPUT_REPORT) ? HID_REPORT_TYPE_OUTPUT : ((type == HID_FEATURE_REPORT) ? HID_REPORT_TYPE_FEATURE : 0)); uref.report_id = report->id; uref.field_index = HID_FIELD_INDEX_NONE; hiddev_send_event(hid, &uref); } /* * fasync file op */ static int hiddev_fasync(int fd, struct file *file, int on) { struct hiddev_list *list = file->private_data; return fasync_helper(fd, file, on, &list->fasync); } /* * release file op */ static int hiddev_release(struct inode * inode, struct file * file) { struct hiddev_list *list = file->private_data; unsigned long flags; spin_lock_irqsave(&list->hiddev->list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hiddev->list_lock, flags); mutex_lock(&list->hiddev->existancelock); if (!--list->hiddev->open) { if (list->hiddev->exist) { hid_hw_close(list->hiddev->hid); hid_hw_power(list->hiddev->hid, PM_HINT_NORMAL); } else { mutex_unlock(&list->hiddev->existancelock); kfree(list->hiddev); vfree(list); return 0; } } mutex_unlock(&list->hiddev->existancelock); vfree(list); return 0; } static int __hiddev_open(struct hiddev *hiddev, struct file *file) { struct hiddev_list *list; int error; lockdep_assert_held(&hiddev->existancelock); list = vzalloc(sizeof(*list)); if (!list) return -ENOMEM; mutex_init(&list->thread_lock); list->hiddev = hiddev; if (!hiddev->open++) { error = hid_hw_power(hiddev->hid, PM_HINT_FULLON); if (error < 0) goto err_drop_count; error = hid_hw_open(hiddev->hid); if (error < 0) goto err_normal_power; } spin_lock_irq(&hiddev->list_lock); list_add_tail(&list->node, &hiddev->list); spin_unlock_irq(&hiddev->list_lock); file->private_data = list; return 0; err_normal_power: hid_hw_power(hiddev->hid, PM_HINT_NORMAL); err_drop_count: hiddev->open--; vfree(list); return error; } /* * open file op */ static int hiddev_open(struct inode *inode, struct file *file) { struct usb_interface *intf; struct hid_device *hid; struct hiddev *hiddev; int res; intf = usbhid_find_interface(iminor(inode)); if (!intf) return -ENODEV; hid = usb_get_intfdata(intf); hiddev = hid->hiddev; mutex_lock(&hiddev->existancelock); res = hiddev->exist ? __hiddev_open(hiddev, file) : -ENODEV; mutex_unlock(&hiddev->existancelock); return res; } /* * "write" file op */ static ssize_t hiddev_write(struct file * file, const char __user * buffer, size_t count, loff_t *ppos) { return -EINVAL; } /* * "read" file op */ static ssize_t hiddev_read(struct file * file, char __user * buffer, size_t count, loff_t *ppos) { DEFINE_WAIT(wait); struct hiddev_list *list = file->private_data; int event_size; int retval; event_size = ((list->flags & HIDDEV_FLAG_UREF) != 0) ? sizeof(struct hiddev_usage_ref) : sizeof(struct hiddev_event); if (count < event_size) return 0; /* lock against other threads */ retval = mutex_lock_interruptible(&list->thread_lock); if (retval) return -ERESTARTSYS; while (retval == 0) { if (list->head == list->tail) { prepare_to_wait(&list->hiddev->wait, &wait, TASK_INTERRUPTIBLE); while (list->head == list->tail) { if (signal_pending(current)) { retval = -ERESTARTSYS; break; } if (!list->hiddev->exist) { retval = -EIO; break; } if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; break; } /* let O_NONBLOCK tasks run */ mutex_unlock(&list->thread_lock); schedule(); if (mutex_lock_interruptible(&list->thread_lock)) { finish_wait(&list->hiddev->wait, &wait); return -EINTR; } set_current_state(TASK_INTERRUPTIBLE); } finish_wait(&list->hiddev->wait, &wait); } if (retval) { mutex_unlock(&list->thread_lock); return retval; } while (list->head != list->tail && retval + event_size <= count) { if ((list->flags & HIDDEV_FLAG_UREF) == 0) { if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE) { struct hiddev_event event; event.hid = list->buffer[list->tail].usage_code; event.value = list->buffer[list->tail].value; if (copy_to_user(buffer + retval, &event, sizeof(struct hiddev_event))) { mutex_unlock(&list->thread_lock); return -EFAULT; } retval += sizeof(struct hiddev_event); } } else { if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE || (list->flags & HIDDEV_FLAG_REPORT) != 0) { if (copy_to_user(buffer + retval, list->buffer + list->tail, sizeof(struct hiddev_usage_ref))) { mutex_unlock(&list->thread_lock); return -EFAULT; } retval += sizeof(struct hiddev_usage_ref); } } list->tail = (list->tail + 1) & (HIDDEV_BUFFER_SIZE - 1); } } mutex_unlock(&list->thread_lock); return retval; } /* * "poll" file op * No kernel lock - fine */ static __poll_t hiddev_poll(struct file *file, poll_table *wait) { struct hiddev_list *list = file->private_data; poll_wait(file, &list->hiddev->wait, wait); if (list->head != list->tail) return EPOLLIN | EPOLLRDNORM | EPOLLOUT; if (!list->hiddev->exist) return EPOLLERR | EPOLLHUP; return 0; } /* * "ioctl" file op */ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, void __user *user_arg) { struct hid_device *hid = hiddev->hid; struct hiddev_report_info rinfo; struct hiddev_usage_ref_multi *uref_multi = NULL; struct hiddev_usage_ref *uref; struct hid_report *report; struct hid_field *field; int i; uref_multi = kmalloc(sizeof(struct hiddev_usage_ref_multi), GFP_KERNEL); if (!uref_multi) return -ENOMEM; uref = &uref_multi->uref; if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { if (copy_from_user(uref_multi, user_arg, sizeof(*uref_multi))) goto fault; } else { if (copy_from_user(uref, user_arg, sizeof(*uref))) goto fault; } switch (cmd) { case HIDIOCGUCODE: rinfo.report_type = uref->report_type; rinfo.report_id = uref->report_id; if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL) goto inval; if (uref->field_index >= report->maxfield) goto inval; uref->field_index = array_index_nospec(uref->field_index, report->maxfield); field = report->field[uref->field_index]; if (uref->usage_index >= field->maxusage) goto inval; uref->usage_index = array_index_nospec(uref->usage_index, field->maxusage); uref->usage_code = field->usage[uref->usage_index].hid; if (copy_to_user(user_arg, uref, sizeof(*uref))) goto fault; goto goodreturn; default: if (cmd != HIDIOCGUSAGE && cmd != HIDIOCGUSAGES && uref->report_type == HID_REPORT_TYPE_INPUT) goto inval; if (uref->report_id == HID_REPORT_ID_UNKNOWN) { field = hiddev_lookup_usage(hid, uref); if (field == NULL) goto inval; } else { rinfo.report_type = uref->report_type; rinfo.report_id = uref->report_id; if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL) goto inval; if (uref->field_index >= report->maxfield) goto inval; uref->field_index = array_index_nospec(uref->field_index, report->maxfield); field = report->field[uref->field_index]; if (cmd == HIDIOCGCOLLECTIONINDEX) { if (uref->usage_index >= field->maxusage) goto inval; uref->usage_index = array_index_nospec(uref->usage_index, field->maxusage); } else if (uref->usage_index >= field->report_count) goto inval; } if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { if (uref_multi->num_values > HID_MAX_MULTI_USAGES || uref->usage_index + uref_multi->num_values > field->report_count) goto inval; uref->usage_index = array_index_nospec(uref->usage_index, field->report_count - uref_multi->num_values); } switch (cmd) { case HIDIOCGUSAGE: if (uref->usage_index >= field->report_count) goto inval; uref->value = field->value[uref->usage_index]; if (copy_to_user(user_arg, uref, sizeof(*uref))) goto fault; goto goodreturn; case HIDIOCSUSAGE: if (uref->usage_index >= field->report_count) goto inval; field->value[uref->usage_index] = uref->value; goto goodreturn; case HIDIOCGCOLLECTIONINDEX: i = field->usage[uref->usage_index].collection_index; kfree(uref_multi); return i; case HIDIOCGUSAGES: for (i = 0; i < uref_multi->num_values; i++) uref_multi->values[i] = field->value[uref->usage_index + i]; if (copy_to_user(user_arg, uref_multi, sizeof(*uref_multi))) goto fault; goto goodreturn; case HIDIOCSUSAGES: for (i = 0; i < uref_multi->num_values; i++) field->value[uref->usage_index + i] = uref_multi->values[i]; goto goodreturn; } goodreturn: kfree(uref_multi); return 0; fault: kfree(uref_multi); return -EFAULT; inval: kfree(uref_multi); return -EINVAL; } } static noinline int hiddev_ioctl_string(struct hiddev *hiddev, unsigned int cmd, void __user *user_arg) { struct hid_device *hid = hiddev->hid; struct usb_device *dev = hid_to_usb_dev(hid); int idx, len; char *buf; if (get_user(idx, (int __user *)user_arg)) return -EFAULT; if ((buf = kmalloc(HID_STRING_SIZE, GFP_KERNEL)) == NULL) return -ENOMEM; if ((len = usb_string(dev, idx, buf, HID_STRING_SIZE-1)) < 0) { kfree(buf); return -EINVAL; } if (copy_to_user(user_arg+sizeof(int), buf, len+1)) { kfree(buf); return -EFAULT; } kfree(buf); return len; } static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct hiddev_list *list = file->private_data; struct hiddev *hiddev = list->hiddev; struct hid_device *hid; struct hiddev_collection_info cinfo; struct hiddev_report_info rinfo; struct hiddev_field_info finfo; struct hiddev_devinfo dinfo; struct hid_report *report; struct hid_field *field; void __user *user_arg = (void __user *)arg; int i, r = -EINVAL; /* Called without BKL by compat methods so no BKL taken */ mutex_lock(&hiddev->existancelock); if (!hiddev->exist) { r = -ENODEV; goto ret_unlock; } hid = hiddev->hid; switch (cmd) { case HIDIOCGVERSION: r = put_user(HID_VERSION, (int __user *)arg) ? -EFAULT : 0; break; case HIDIOCAPPLICATION: if (arg >= hid->maxapplication) break; for (i = 0; i < hid->maxcollection; i++) if (hid->collection[i].type == HID_COLLECTION_APPLICATION && arg-- == 0) break; if (i < hid->maxcollection) r = hid->collection[i].usage; break; case HIDIOCGDEVINFO: { struct usb_device *dev = hid_to_usb_dev(hid); struct usbhid_device *usbhid = hid->driver_data; memset(&dinfo, 0, sizeof(dinfo)); dinfo.bustype = BUS_USB; dinfo.busnum = dev->bus->busnum; dinfo.devnum = dev->devnum; dinfo.ifnum = usbhid->ifnum; dinfo.vendor = le16_to_cpu(dev->descriptor.idVendor); dinfo.product = le16_to_cpu(dev->descriptor.idProduct); dinfo.version = le16_to_cpu(dev->descriptor.bcdDevice); dinfo.num_applications = hid->maxapplication; r = copy_to_user(user_arg, &dinfo, sizeof(dinfo)) ? -EFAULT : 0; break; } case HIDIOCGFLAG: r = put_user(list->flags, (int __user *)arg) ? -EFAULT : 0; break; case HIDIOCSFLAG: { int newflags; if (get_user(newflags, (int __user *)arg)) { r = -EFAULT; break; } if ((newflags & ~HIDDEV_FLAGS) != 0 || ((newflags & HIDDEV_FLAG_REPORT) != 0 && (newflags & HIDDEV_FLAG_UREF) == 0)) break; list->flags = newflags; r = 0; break; } case HIDIOCGSTRING: r = hiddev_ioctl_string(hiddev, cmd, user_arg); break; case HIDIOCINITREPORT: usbhid_init_reports(hid); hiddev->initialized = true; r = 0; break; case HIDIOCGREPORT: if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) { r = -EFAULT; break; } if (rinfo.report_type == HID_REPORT_TYPE_OUTPUT) break; report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; hid_hw_request(hid, report, HID_REQ_GET_REPORT); hid_hw_wait(hid); r = 0; break; case HIDIOCSREPORT: if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) { r = -EFAULT; break; } if (rinfo.report_type == HID_REPORT_TYPE_INPUT) break; report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; hid_hw_request(hid, report, HID_REQ_SET_REPORT); hid_hw_wait(hid); r = 0; break; case HIDIOCGREPORTINFO: if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) { r = -EFAULT; break; } report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; rinfo.num_fields = report->maxfield; r = copy_to_user(user_arg, &rinfo, sizeof(rinfo)) ? -EFAULT : 0; break; case HIDIOCGFIELDINFO: if (copy_from_user(&finfo, user_arg, sizeof(finfo))) { r = -EFAULT; break; } rinfo.report_type = finfo.report_type; rinfo.report_id = finfo.report_id; report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; if (finfo.field_index >= report->maxfield) break; finfo.field_index = array_index_nospec(finfo.field_index, report->maxfield); field = report->field[finfo.field_index]; memset(&finfo, 0, sizeof(finfo)); finfo.report_type = rinfo.report_type; finfo.report_id = rinfo.report_id; finfo.field_index = field->report_count - 1; finfo.maxusage = field->maxusage; finfo.flags = field->flags; finfo.physical = field->physical; finfo.logical = field->logical; finfo.application = field->application; finfo.logical_minimum = field->logical_minimum; finfo.logical_maximum = field->logical_maximum; finfo.physical_minimum = field->physical_minimum; finfo.physical_maximum = field->physical_maximum; finfo.unit_exponent = field->unit_exponent; finfo.unit = field->unit; r = copy_to_user(user_arg, &finfo, sizeof(finfo)) ? -EFAULT : 0; break; case HIDIOCGUCODE: case HIDIOCGUSAGE: case HIDIOCSUSAGE: case HIDIOCGUSAGES: case HIDIOCSUSAGES: case HIDIOCGCOLLECTIONINDEX: if (!hiddev->initialized) { usbhid_init_reports(hid); hiddev->initialized = true; } r = hiddev_ioctl_usage(hiddev, cmd, user_arg); break; case HIDIOCGCOLLECTIONINFO: if (copy_from_user(&cinfo, user_arg, sizeof(cinfo))) { r = -EFAULT; break; } if (cinfo.index >= hid->maxcollection) break; cinfo.index = array_index_nospec(cinfo.index, hid->maxcollection); cinfo.type = hid->collection[cinfo.index].type; cinfo.usage = hid->collection[cinfo.index].usage; cinfo.level = hid->collection[cinfo.index].level; r = copy_to_user(user_arg, &cinfo, sizeof(cinfo)) ? -EFAULT : 0; break; default: if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ) break; if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGNAME(0))) { int len = strlen(hid->name) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); r = copy_to_user(user_arg, hid->name, len) ? -EFAULT : len; break; } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGPHYS(0))) { int len = strlen(hid->phys) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); r = copy_to_user(user_arg, hid->phys, len) ? -EFAULT : len; break; } } ret_unlock: mutex_unlock(&hiddev->existancelock); return r; } static const struct file_operations hiddev_fops = { .owner = THIS_MODULE, .read = hiddev_read, .write = hiddev_write, .poll = hiddev_poll, .open = hiddev_open, .release = hiddev_release, .unlocked_ioctl = hiddev_ioctl, .fasync = hiddev_fasync, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; static char *hiddev_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } static struct usb_class_driver hiddev_class = { .name = "hiddev%d", .devnode = hiddev_devnode, .fops = &hiddev_fops, .minor_base = HIDDEV_MINOR_BASE, }; /* * This is where hid.c calls us to connect a hid device to the hiddev driver */ int hiddev_connect(struct hid_device *hid, unsigned int force) { struct hiddev *hiddev; struct usbhid_device *usbhid = hid->driver_data; int retval; if (!force) { unsigned int i; for (i = 0; i < hid->maxcollection; i++) if (hid->collection[i].type == HID_COLLECTION_APPLICATION && !IS_INPUT_APPLICATION(hid->collection[i].usage)) break; if (i == hid->maxcollection) return -EINVAL; } if (!(hiddev = kzalloc(sizeof(struct hiddev), GFP_KERNEL))) return -ENOMEM; init_waitqueue_head(&hiddev->wait); INIT_LIST_HEAD(&hiddev->list); spin_lock_init(&hiddev->list_lock); mutex_init(&hiddev->existancelock); hid->hiddev = hiddev; hiddev->hid = hid; hiddev->exist = 1; retval = usb_register_dev(usbhid->intf, &hiddev_class); if (retval) { hid_err(hid, "Not able to get a minor for this device\n"); hid->hiddev = NULL; kfree(hiddev); return retval; } /* * If HID_QUIRK_NO_INIT_REPORTS is set, make sure we don't initialize * the reports. */ hiddev->initialized = hid->quirks & HID_QUIRK_NO_INIT_REPORTS; hiddev->minor = usbhid->intf->minor; return 0; } /* * This is where hid.c calls us to disconnect a hiddev device from the * corresponding hid device (usually because the usb device has disconnected) */ static struct usb_class_driver hiddev_class; void hiddev_disconnect(struct hid_device *hid) { struct hiddev *hiddev = hid->hiddev; struct usbhid_device *usbhid = hid->driver_data; usb_deregister_dev(usbhid->intf, &hiddev_class); mutex_lock(&hiddev->existancelock); hiddev->exist = 0; if (hiddev->open) { hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); } } |
| 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | // SPDX-License-Identifier: GPL-2.0 #include <linux/build_bug.h> #include <linux/errno.h> #include <linux/errname.h> #include <linux/kernel.h> #include <linux/math.h> /* * Ensure these tables do not accidentally become gigantic if some * huge errno makes it in. On most architectures, the first table will * only have about 140 entries, but mips and parisc have more sparsely * allocated errnos (with EHWPOISON = 257 on parisc, and EDQUOT = 1133 * on mips), so this wastes a bit of space on those - though we * special case the EDQUOT case. */ #define E(err) [err + BUILD_BUG_ON_ZERO(err <= 0 || err > 300)] = "-" #err static const char *names_0[] = { E(E2BIG), E(EACCES), E(EADDRINUSE), E(EADDRNOTAVAIL), E(EADV), E(EAFNOSUPPORT), E(EAGAIN), /* EWOULDBLOCK */ E(EALREADY), E(EBADE), E(EBADF), E(EBADFD), E(EBADMSG), E(EBADR), E(EBADRQC), E(EBADSLT), E(EBFONT), E(EBUSY), E(ECANCELED), /* ECANCELLED */ E(ECHILD), E(ECHRNG), E(ECOMM), E(ECONNABORTED), E(ECONNREFUSED), /* EREFUSED */ E(ECONNRESET), E(EDEADLK), /* EDEADLOCK */ #if EDEADLK != EDEADLOCK /* mips, sparc, powerpc */ E(EDEADLOCK), #endif E(EDESTADDRREQ), E(EDOM), E(EDOTDOT), #ifndef CONFIG_MIPS E(EDQUOT), #endif E(EEXIST), E(EFAULT), E(EFBIG), E(EHOSTDOWN), E(EHOSTUNREACH), E(EHWPOISON), E(EIDRM), E(EILSEQ), #ifdef EINIT E(EINIT), #endif E(EINPROGRESS), E(EINTR), E(EINVAL), E(EIO), E(EISCONN), E(EISDIR), E(EISNAM), E(EKEYEXPIRED), E(EKEYREJECTED), E(EKEYREVOKED), E(EL2HLT), E(EL2NSYNC), E(EL3HLT), E(EL3RST), E(ELIBACC), E(ELIBBAD), E(ELIBEXEC), E(ELIBMAX), E(ELIBSCN), E(ELNRNG), E(ELOOP), E(EMEDIUMTYPE), E(EMFILE), E(EMLINK), E(EMSGSIZE), E(EMULTIHOP), E(ENAMETOOLONG), E(ENAVAIL), E(ENETDOWN), E(ENETRESET), E(ENETUNREACH), E(ENFILE), E(ENOANO), E(ENOBUFS), E(ENOCSI), E(ENODATA), E(ENODEV), E(ENOENT), E(ENOEXEC), E(ENOKEY), E(ENOLCK), E(ENOLINK), E(ENOMEDIUM), E(ENOMEM), E(ENOMSG), E(ENONET), E(ENOPKG), E(ENOPROTOOPT), E(ENOSPC), E(ENOSR), E(ENOSTR), E(ENOSYS), E(ENOTBLK), E(ENOTCONN), E(ENOTDIR), E(ENOTEMPTY), E(ENOTNAM), E(ENOTRECOVERABLE), E(ENOTSOCK), E(ENOTTY), E(ENOTUNIQ), E(ENXIO), E(EOPNOTSUPP), E(EOVERFLOW), E(EOWNERDEAD), E(EPERM), E(EPFNOSUPPORT), E(EPIPE), #ifdef EPROCLIM E(EPROCLIM), #endif E(EPROTO), E(EPROTONOSUPPORT), E(EPROTOTYPE), E(ERANGE), E(EREMCHG), #ifdef EREMDEV E(EREMDEV), #endif E(EREMOTE), E(EREMOTEIO), E(ERESTART), E(ERFKILL), E(EROFS), #ifdef ERREMOTE E(ERREMOTE), #endif E(ESHUTDOWN), E(ESOCKTNOSUPPORT), E(ESPIPE), E(ESRCH), E(ESRMNT), E(ESTALE), E(ESTRPIPE), E(ETIME), E(ETIMEDOUT), E(ETOOMANYREFS), E(ETXTBSY), E(EUCLEAN), E(EUNATCH), E(EUSERS), E(EXDEV), E(EXFULL), }; #undef E #ifdef EREFUSED /* parisc */ static_assert(EREFUSED == ECONNREFUSED); #endif #ifdef ECANCELLED /* parisc */ static_assert(ECANCELLED == ECANCELED); #endif static_assert(EAGAIN == EWOULDBLOCK); /* everywhere */ #define E(err) [err - 512 + BUILD_BUG_ON_ZERO(err < 512 || err > 550)] = "-" #err static const char *names_512[] = { E(ERESTARTSYS), E(ERESTARTNOINTR), E(ERESTARTNOHAND), E(ENOIOCTLCMD), E(ERESTART_RESTARTBLOCK), E(EPROBE_DEFER), E(EOPENSTALE), E(ENOPARAM), E(EBADHANDLE), E(ENOTSYNC), E(EBADCOOKIE), E(ENOTSUPP), E(ETOOSMALL), E(ESERVERFAULT), E(EBADTYPE), E(EJUKEBOX), E(EIOCBQUEUED), E(ERECALLCONFLICT), }; #undef E static const char *__errname(unsigned err) { if (err < ARRAY_SIZE(names_0)) return names_0[err]; if (err >= 512 && err - 512 < ARRAY_SIZE(names_512)) return names_512[err - 512]; /* But why? */ if (IS_ENABLED(CONFIG_MIPS) && err == EDQUOT) /* 1133 */ return "-EDQUOT"; return NULL; } /* * errname(EIO) -> "EIO" * errname(-EIO) -> "-EIO" */ const char *errname(int err) { const char *name = __errname(abs(err)); if (!name) return NULL; return err > 0 ? name + 1 : name; } EXPORT_SYMBOL(errname); |
| 2 1 1 2 2 3 1 4 7 7 7 1 1 6 6 6 8 2 7 7 7 1 7 8 39 39 39 39 39 39 12 39 39 11 11 11 11 3 3 38 9 9 9 9 10 10 10 41 41 39 39 38 38 11 11 11 27 11 3 38 38 8 8 1 9 2 37 35 36 28 36 35 26 2 1 1 25 38 33 33 38 9 29 733 733 14 735 10 9 421 10 12 12 12 11 12 12 12 12 12 140 140 139 14 14 4 4 14 14 10 4 14 2 1 12 12 2 11 10 8 10 9 10 163 161 163 15 15 15 15 15 15 15 12 3 3 3 3 1 163 3 163 1 1237 2 1231 1 1 33 3 33 2 1 63 1 63 2 79 3 78 1 14 33 107 227 421 41 39 49 14 21 20 12 5 12 1 6235 5 12 2595 2594 22 2597 21 12 12 21 17 17 17 8 21 17 12 161 161 3 9002 9018 5 4 3 2 7 9026 50 51 51 2 51 51 49 49 2 2 51 2 3258 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 | // SPDX-License-Identifier: GPL-2.0-only /* * Landlock - Filesystem management and hooks * * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI * Copyright © 2021-2025 Microsoft Corporation * Copyright © 2022 Günther Noack <gnoack3000@gmail.com> * Copyright © 2023-2024 Google LLC */ #include <asm/ioctls.h> #include <kunit/test.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/bits.h> #include <linux/compiler_types.h> #include <linux/dcache.h> #include <linux/err.h> #include <linux/falloc.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/lsm_audit.h> #include <linux/lsm_hooks.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/path.h> #include <linux/pid.h> #include <linux/rcupdate.h> #include <linux/sched/signal.h> #include <linux/spinlock.h> #include <linux/stat.h> #include <linux/types.h> #include <linux/wait_bit.h> #include <linux/workqueue.h> #include <uapi/linux/fiemap.h> #include <uapi/linux/landlock.h> #include "access.h" #include "audit.h" #include "common.h" #include "cred.h" #include "domain.h" #include "fs.h" #include "limits.h" #include "object.h" #include "ruleset.h" #include "setup.h" /* Underlying object management */ static void release_inode(struct landlock_object *const object) __releases(object->lock) { struct inode *const inode = object->underobj; struct super_block *sb; if (!inode) { spin_unlock(&object->lock); return; } /* * Protects against concurrent use by hook_sb_delete() of the reference * to the underlying inode. */ object->underobj = NULL; /* * Makes sure that if the filesystem is concurrently unmounted, * hook_sb_delete() will wait for us to finish iput(). */ sb = inode->i_sb; atomic_long_inc(&landlock_superblock(sb)->inode_refs); spin_unlock(&object->lock); /* * Because object->underobj was not NULL, hook_sb_delete() and * get_inode_object() guarantee that it is safe to reset * landlock_inode(inode)->object while it is not NULL. It is therefore * not necessary to lock inode->i_lock. */ rcu_assign_pointer(landlock_inode(inode)->object, NULL); /* * Now, new rules can safely be tied to @inode with get_inode_object(). */ iput(inode); if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs)) wake_up_var(&landlock_superblock(sb)->inode_refs); } static const struct landlock_object_underops landlock_fs_underops = { .release = release_inode }; /* IOCTL helpers */ /** * is_masked_device_ioctl - Determine whether an IOCTL command is always * permitted with Landlock for device files. These commands can not be * restricted on device files by enforcing a Landlock policy. * * @cmd: The IOCTL command that is supposed to be run. * * By default, any IOCTL on a device file requires the * LANDLOCK_ACCESS_FS_IOCTL_DEV right. However, we blanket-permit some * commands, if: * * 1. The command is implemented in fs/ioctl.c's do_vfs_ioctl(), * not in f_ops->unlocked_ioctl() or f_ops->compat_ioctl(). * * 2. The command is harmless when invoked on devices. * * We also permit commands that do not make sense for devices, but where the * do_vfs_ioctl() implementation returns a more conventional error code. * * Any new IOCTL commands that are implemented in fs/ioctl.c's do_vfs_ioctl() * should be considered for inclusion here. * * Returns: true if the IOCTL @cmd can not be restricted with Landlock for * device files. */ static __attribute_const__ bool is_masked_device_ioctl(const unsigned int cmd) { switch (cmd) { /* * FIOCLEX, FIONCLEX, FIONBIO and FIOASYNC manipulate the FD's * close-on-exec and the file's buffered-IO and async flags. These * operations are also available through fcntl(2), and are * unconditionally permitted in Landlock. */ case FIOCLEX: case FIONCLEX: case FIONBIO: case FIOASYNC: /* * FIOQSIZE queries the size of a regular file, directory, or link. * * We still permit it, because it always returns -ENOTTY for * other file types. */ case FIOQSIZE: /* * FIFREEZE and FITHAW freeze and thaw the file system which the * given file belongs to. Requires CAP_SYS_ADMIN. * * These commands operate on the file system's superblock rather * than on the file itself. The same operations can also be * done through any other file or directory on the same file * system, so it is safe to permit these. */ case FIFREEZE: case FITHAW: /* * FS_IOC_FIEMAP queries information about the allocation of * blocks within a file. * * This IOCTL command only makes sense for regular files and is * not implemented by devices. It is harmless to permit. */ case FS_IOC_FIEMAP: /* * FIGETBSZ queries the file system's block size for a file or * directory. * * This command operates on the file system's superblock rather * than on the file itself. The same operation can also be done * through any other file or directory on the same file system, * so it is safe to permit it. */ case FIGETBSZ: /* * FICLONE, FICLONERANGE and FIDEDUPERANGE make files share * their underlying storage ("reflink") between source and * destination FDs, on file systems which support that. * * These IOCTL commands only apply to regular files * and are harmless to permit for device files. */ case FICLONE: case FICLONERANGE: case FIDEDUPERANGE: /* * FS_IOC_GETFSUUID and FS_IOC_GETFSSYSFSPATH both operate on * the file system superblock, not on the specific file, so * these operations are available through any other file on the * same file system as well. */ case FS_IOC_GETFSUUID: case FS_IOC_GETFSSYSFSPATH: return true; /* * FIONREAD, FS_IOC_GETFLAGS, FS_IOC_SETFLAGS, FS_IOC_FSGETXATTR and * FS_IOC_FSSETXATTR are forwarded to device implementations. */ /* * file_ioctl() commands (FIBMAP, FS_IOC_RESVSP, FS_IOC_RESVSP64, * FS_IOC_UNRESVSP, FS_IOC_UNRESVSP64 and FS_IOC_ZERO_RANGE) are * forwarded to device implementations, so not permitted. */ /* Other commands are guarded by the access right. */ default: return false; } } /* * is_masked_device_ioctl_compat - same as the helper above, but checking the * "compat" IOCTL commands. * * The IOCTL commands with special handling in compat-mode should behave the * same as their non-compat counterparts. */ static __attribute_const__ bool is_masked_device_ioctl_compat(const unsigned int cmd) { switch (cmd) { /* FICLONE is permitted, same as in the non-compat variant. */ case FICLONE: return true; #if defined(CONFIG_X86_64) /* * FS_IOC_RESVSP_32, FS_IOC_RESVSP64_32, FS_IOC_UNRESVSP_32, * FS_IOC_UNRESVSP64_32, FS_IOC_ZERO_RANGE_32: not blanket-permitted, * for consistency with their non-compat variants. */ case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: case FS_IOC_UNRESVSP_32: case FS_IOC_UNRESVSP64_32: case FS_IOC_ZERO_RANGE_32: #endif /* * FS_IOC32_GETFLAGS, FS_IOC32_SETFLAGS are forwarded to their device * implementations. */ case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: return false; default: return is_masked_device_ioctl(cmd); } } /* Ruleset management */ static struct landlock_object *get_inode_object(struct inode *const inode) { struct landlock_object *object, *new_object; struct landlock_inode_security *inode_sec = landlock_inode(inode); rcu_read_lock(); retry: object = rcu_dereference(inode_sec->object); if (object) { if (likely(refcount_inc_not_zero(&object->usage))) { rcu_read_unlock(); return object; } /* * We are racing with release_inode(), the object is going * away. Wait for release_inode(), then retry. */ spin_lock(&object->lock); spin_unlock(&object->lock); goto retry; } rcu_read_unlock(); /* * If there is no object tied to @inode, then create a new one (without * holding any locks). */ new_object = landlock_create_object(&landlock_fs_underops, inode); if (IS_ERR(new_object)) return new_object; /* * Protects against concurrent calls to get_inode_object() or * hook_sb_delete(). */ spin_lock(&inode->i_lock); if (unlikely(rcu_access_pointer(inode_sec->object))) { /* Someone else just created the object, bail out and retry. */ spin_unlock(&inode->i_lock); kfree(new_object); rcu_read_lock(); goto retry; } /* * @inode will be released by hook_sb_delete() on its superblock * shutdown, or by release_inode() when no more ruleset references the * related object. */ ihold(inode); rcu_assign_pointer(inode_sec->object, new_object); spin_unlock(&inode->i_lock); return new_object; } /* All access rights that can be tied to files. */ /* clang-format off */ #define ACCESS_FILE ( \ LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ LANDLOCK_ACCESS_FS_READ_FILE | \ LANDLOCK_ACCESS_FS_TRUNCATE | \ LANDLOCK_ACCESS_FS_IOCTL_DEV) /* clang-format on */ /* * @path: Should have been checked by get_path_from_fd(). */ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, const struct path *const path, access_mask_t access_rights) { int err; struct landlock_id id = { .type = LANDLOCK_KEY_INODE, }; /* Files only get access rights that make sense. */ if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) != ACCESS_FILE) return -EINVAL; if (WARN_ON_ONCE(ruleset->num_layers != 1)) return -EINVAL; /* Transforms relative access rights to absolute ones. */ access_rights |= LANDLOCK_MASK_ACCESS_FS & ~landlock_get_fs_access_mask(ruleset, 0); id.key.object = get_inode_object(d_backing_inode(path->dentry)); if (IS_ERR(id.key.object)) return PTR_ERR(id.key.object); mutex_lock(&ruleset->lock); err = landlock_insert_rule(ruleset, id, access_rights); mutex_unlock(&ruleset->lock); /* * No need to check for an error because landlock_insert_rule() * increments the refcount for the new object if needed. */ landlock_put_object(id.key.object); return err; } /* Access-control management */ /* * The lifetime of the returned rule is tied to @domain. * * Returns NULL if no rule is found or if @dentry is negative. */ static const struct landlock_rule * find_rule(const struct landlock_ruleset *const domain, const struct dentry *const dentry) { const struct landlock_rule *rule; const struct inode *inode; struct landlock_id id = { .type = LANDLOCK_KEY_INODE, }; /* Ignores nonexistent leafs. */ if (d_is_negative(dentry)) return NULL; inode = d_backing_inode(dentry); rcu_read_lock(); id.key.object = rcu_dereference(landlock_inode(inode)->object); rule = landlock_find_rule(domain, id); rcu_read_unlock(); return rule; } /* * Allows access to pseudo filesystems that will never be mountable (e.g. * sockfs, pipefs), but can still be reachable through * /proc/<pid>/fd/<file-descriptor> */ static bool is_nouser_or_private(const struct dentry *dentry) { return (dentry->d_sb->s_flags & SB_NOUSER) || (d_is_positive(dentry) && unlikely(IS_PRIVATE(d_backing_inode(dentry)))); } static const struct access_masks any_fs = { .fs = ~0, }; /* * Check that a destination file hierarchy has more restrictions than a source * file hierarchy. This is only used for link and rename actions. * * @layer_masks_child2: Optional child masks. */ static bool no_more_access( const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], const bool child1_is_directory, const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], const bool child2_is_directory) { unsigned long access_bit; for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); access_bit++) { /* Ignores accesses that only make sense for directories. */ const bool is_file_access = !!(BIT_ULL(access_bit) & ACCESS_FILE); if (child1_is_directory || is_file_access) { /* * Checks if the destination restrictions are a * superset of the source ones (i.e. inherited access * rights without child exceptions): * restrictions(parent2) >= restrictions(child1) */ if ((((*layer_masks_parent1)[access_bit] & (*layer_masks_child1)[access_bit]) | (*layer_masks_parent2)[access_bit]) != (*layer_masks_parent2)[access_bit]) return false; } if (!layer_masks_child2) continue; if (child2_is_directory || is_file_access) { /* * Checks inverted restrictions for RENAME_EXCHANGE: * restrictions(parent1) >= restrictions(child2) */ if ((((*layer_masks_parent2)[access_bit] & (*layer_masks_child2)[access_bit]) | (*layer_masks_parent1)[access_bit]) != (*layer_masks_parent1)[access_bit]) return false; } } return true; } #define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__)) #define NMA_FALSE(...) KUNIT_EXPECT_FALSE(test, no_more_access(__VA_ARGS__)) #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static void test_no_more_access(struct kunit *const test) { const layer_mask_t rx0[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT_ULL(0), }; const layer_mask_t mx0[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_MAKE_REG)] = BIT_ULL(0), }; const layer_mask_t x0[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), }; const layer_mask_t x1[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(1), }; const layer_mask_t x01[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) | BIT_ULL(1), }; const layer_mask_t allows_all[LANDLOCK_NUM_ACCESS_FS] = {}; /* Checks without restriction. */ NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false); NMA_TRUE(&allows_all, &x0, false, &allows_all, NULL, false); NMA_FALSE(&x0, &x0, false, &allows_all, NULL, false); /* * Checks that we can only refer a file if no more access could be * inherited. */ NMA_TRUE(&x0, &x0, false, &rx0, NULL, false); NMA_TRUE(&rx0, &rx0, false, &rx0, NULL, false); NMA_FALSE(&rx0, &rx0, false, &x0, NULL, false); NMA_FALSE(&rx0, &rx0, false, &x1, NULL, false); /* Checks allowed referring with different nested domains. */ NMA_TRUE(&x0, &x1, false, &x0, NULL, false); NMA_TRUE(&x1, &x0, false, &x0, NULL, false); NMA_TRUE(&x0, &x01, false, &x0, NULL, false); NMA_TRUE(&x0, &x01, false, &rx0, NULL, false); NMA_TRUE(&x01, &x0, false, &x0, NULL, false); NMA_TRUE(&x01, &x0, false, &rx0, NULL, false); NMA_FALSE(&x01, &x01, false, &x0, NULL, false); /* Checks that file access rights are also enforced for a directory. */ NMA_FALSE(&rx0, &rx0, true, &x0, NULL, false); /* Checks that directory access rights don't impact file referring... */ NMA_TRUE(&mx0, &mx0, false, &x0, NULL, false); /* ...but only directory referring. */ NMA_FALSE(&mx0, &mx0, true, &x0, NULL, false); /* Checks directory exchange. */ NMA_TRUE(&mx0, &mx0, true, &mx0, &mx0, true); NMA_TRUE(&mx0, &mx0, true, &mx0, &x0, true); NMA_FALSE(&mx0, &mx0, true, &x0, &mx0, true); NMA_FALSE(&mx0, &mx0, true, &x0, &x0, true); NMA_FALSE(&mx0, &mx0, true, &x1, &x1, true); /* Checks file exchange with directory access rights... */ NMA_TRUE(&mx0, &mx0, false, &mx0, &mx0, false); NMA_TRUE(&mx0, &mx0, false, &mx0, &x0, false); NMA_TRUE(&mx0, &mx0, false, &x0, &mx0, false); NMA_TRUE(&mx0, &mx0, false, &x0, &x0, false); /* ...and with file access rights. */ NMA_TRUE(&rx0, &rx0, false, &rx0, &rx0, false); NMA_TRUE(&rx0, &rx0, false, &rx0, &x0, false); NMA_FALSE(&rx0, &rx0, false, &x0, &rx0, false); NMA_FALSE(&rx0, &rx0, false, &x0, &x0, false); NMA_FALSE(&rx0, &rx0, false, &x1, &x1, false); /* * Allowing the following requests should not be a security risk * because domain 0 denies execute access, and domain 1 is always * nested with domain 0. However, adding an exception for this case * would mean to check all nested domains to make sure none can get * more privileges (e.g. processes only sandboxed by domain 0). * Moreover, this behavior (i.e. composition of N domains) could then * be inconsistent compared to domain 1's ruleset alone (e.g. it might * be denied to link/rename with domain 1's ruleset, whereas it would * be allowed if nested on top of domain 0). Another drawback would be * to create a cover channel that could enable sandboxed processes to * infer most of the filesystem restrictions from their domain. To * make it simple, efficient, safe, and more consistent, this case is * always denied. */ NMA_FALSE(&x1, &x1, false, &x0, NULL, false); NMA_FALSE(&x1, &x1, false, &rx0, NULL, false); NMA_FALSE(&x1, &x1, true, &x0, NULL, false); NMA_FALSE(&x1, &x1, true, &rx0, NULL, false); /* Checks the same case of exclusive domains with a file... */ NMA_TRUE(&x1, &x1, false, &x01, NULL, false); NMA_FALSE(&x1, &x1, false, &x01, &x0, false); NMA_FALSE(&x1, &x1, false, &x01, &x01, false); NMA_FALSE(&x1, &x1, false, &x0, &x0, false); /* ...and with a directory. */ NMA_FALSE(&x1, &x1, false, &x0, &x0, true); NMA_FALSE(&x1, &x1, true, &x0, &x0, false); NMA_FALSE(&x1, &x1, true, &x0, &x0, true); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ #undef NMA_TRUE #undef NMA_FALSE static bool is_layer_masks_allowed( layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) { return !memchr_inv(layer_masks, 0, sizeof(*layer_masks)); } /* * Removes @layer_masks accesses that are not requested. * * Returns true if the request is allowed, false otherwise. */ static bool scope_to_request(const access_mask_t access_request, layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) { const unsigned long access_req = access_request; unsigned long access_bit; if (WARN_ON_ONCE(!layer_masks)) return true; for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) (*layer_masks)[access_bit] = 0; return is_layer_masks_allowed(layer_masks); } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static void test_scope_to_request_with_exec_none(struct kunit *const test) { /* Allows everything. */ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; /* Checks and scopes with execute. */ KUNIT_EXPECT_TRUE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &layer_masks)); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); } static void test_scope_to_request_with_exec_some(struct kunit *const test) { /* Denies execute and write. */ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), }; /* Checks and scopes with execute. */ KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &layer_masks)); KUNIT_EXPECT_EQ(test, BIT_ULL(0), layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); } static void test_scope_to_request_without_access(struct kunit *const test) { /* Denies execute and write. */ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), }; /* Checks and scopes without access request. */ KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks)); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ /* * Returns true if there is at least one access right different than * LANDLOCK_ACCESS_FS_REFER. */ static bool is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], const access_mask_t access_request) { unsigned long access_bit; /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ const unsigned long access_check = access_request & ~LANDLOCK_ACCESS_FS_REFER; if (!layer_masks) return false; for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { if ((*layer_masks)[access_bit]) return true; } return false; } #define IE_TRUE(...) KUNIT_EXPECT_TRUE(test, is_eacces(__VA_ARGS__)) #define IE_FALSE(...) KUNIT_EXPECT_FALSE(test, is_eacces(__VA_ARGS__)) #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static void test_is_eacces_with_none(struct kunit *const test) { const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; IE_FALSE(&layer_masks, 0); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_refer(struct kunit *const test) { const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = BIT_ULL(0), }; IE_FALSE(&layer_masks, 0); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_write(struct kunit *const test) { const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(0), }; IE_FALSE(&layer_masks, 0); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); IE_TRUE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ #undef IE_TRUE #undef IE_FALSE /** * is_access_to_paths_allowed - Check accesses for requests with a common path * * @domain: Domain to check against. * @path: File hierarchy to walk through. For refer checks, this would be * the common mountpoint. * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is * equal to @layer_masks_parent2 (if any). This is tied to the unique * requested path for most actions, or the source in case of a refer action * (i.e. rename or link), or the source and destination in case of * RENAME_EXCHANGE. * @layer_masks_parent1: Pointer to a matrix of layer masks per access * masks, identifying the layers that forbid a specific access. Bits from * this matrix can be unset according to the @path walk. An empty matrix * means that @domain allows all possible Landlock accesses (i.e. not only * those identified by @access_request_parent1). This matrix can * initially refer to domain layer masks and, when the accesses for the * destination and source are the same, to requested layer masks. * @log_request_parent1: Audit request to fill if the related access is denied. * @dentry_child1: Dentry to the initial child of the parent1 path. This * pointer must be NULL for non-refer actions (i.e. not link nor rename). * @access_request_parent2: Similar to @access_request_parent1 but for a * request involving a source and a destination. This refers to the * destination, except in case of RENAME_EXCHANGE where it also refers to * the source. Must be set to 0 when using a simple path request. * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer * action. This must be NULL otherwise. * @log_request_parent2: Audit request to fill if the related access is denied. * @dentry_child2: Dentry to the initial child of the parent2 path. This * pointer is only set for RENAME_EXCHANGE actions and must be NULL * otherwise. * * This helper first checks that the destination has a superset of restrictions * compared to the source (if any) for a common path. Because of * RENAME_EXCHANGE actions, source and destinations may be swapped. It then * checks that the collected accesses and the remaining ones are enough to * allow the request. * * Returns: * - true if the access request is granted; * - false otherwise. */ static bool is_access_to_paths_allowed( const struct landlock_ruleset *const domain, const struct path *const path, const access_mask_t access_request_parent1, layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], struct landlock_request *const log_request_parent1, struct dentry *const dentry_child1, const access_mask_t access_request_parent2, layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], struct landlock_request *const log_request_parent2, struct dentry *const dentry_child2) { bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, child1_is_directory = true, child2_is_directory = true; struct path walker_path; access_mask_t access_masked_parent1, access_masked_parent2; layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; if (!access_request_parent1 && !access_request_parent2) return true; if (WARN_ON_ONCE(!path)) return true; if (is_nouser_or_private(path->dentry)) return true; if (WARN_ON_ONCE(!layer_masks_parent1)) return false; allowed_parent1 = is_layer_masks_allowed(layer_masks_parent1); if (unlikely(layer_masks_parent2)) { if (WARN_ON_ONCE(!dentry_child1)) return false; allowed_parent2 = is_layer_masks_allowed(layer_masks_parent2); /* * For a double request, first check for potential privilege * escalation by looking at domain handled accesses (which are * a superset of the meaningful requested accesses). */ access_masked_parent1 = access_masked_parent2 = landlock_union_access_masks(domain).fs; is_dom_check = true; } else { if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) return false; /* For a simple request, only check for requested accesses. */ access_masked_parent1 = access_request_parent1; access_masked_parent2 = access_request_parent2; is_dom_check = false; } if (unlikely(dentry_child1)) { landlock_unmask_layers( find_rule(domain, dentry_child1), landlock_init_layer_masks( domain, LANDLOCK_MASK_ACCESS_FS, &_layer_masks_child1, LANDLOCK_KEY_INODE), &_layer_masks_child1, ARRAY_SIZE(_layer_masks_child1)); layer_masks_child1 = &_layer_masks_child1; child1_is_directory = d_is_dir(dentry_child1); } if (unlikely(dentry_child2)) { landlock_unmask_layers( find_rule(domain, dentry_child2), landlock_init_layer_masks( domain, LANDLOCK_MASK_ACCESS_FS, &_layer_masks_child2, LANDLOCK_KEY_INODE), &_layer_masks_child2, ARRAY_SIZE(_layer_masks_child2)); layer_masks_child2 = &_layer_masks_child2; child2_is_directory = d_is_dir(dentry_child2); } walker_path = *path; path_get(&walker_path); /* * We need to walk through all the hierarchy to not miss any relevant * restriction. */ while (true) { const struct landlock_rule *rule; /* * If at least all accesses allowed on the destination are * already allowed on the source, respectively if there is at * least as much as restrictions on the destination than on the * source, then we can safely refer files from the source to * the destination without risking a privilege escalation. * This also applies in the case of RENAME_EXCHANGE, which * implies checks on both direction. This is crucial for * standalone multilayered security policies. Furthermore, * this helps avoid policy writers to shoot themselves in the * foot. */ if (unlikely(is_dom_check && no_more_access( layer_masks_parent1, layer_masks_child1, child1_is_directory, layer_masks_parent2, layer_masks_child2, child2_is_directory))) { /* * Now, downgrades the remaining checks from domain * handled accesses to requested accesses. */ is_dom_check = false; access_masked_parent1 = access_request_parent1; access_masked_parent2 = access_request_parent2; allowed_parent1 = allowed_parent1 || scope_to_request(access_masked_parent1, layer_masks_parent1); allowed_parent2 = allowed_parent2 || scope_to_request(access_masked_parent2, layer_masks_parent2); /* Stops when all accesses are granted. */ if (allowed_parent1 && allowed_parent2) break; } rule = find_rule(domain, walker_path.dentry); allowed_parent1 = allowed_parent1 || landlock_unmask_layers( rule, access_masked_parent1, layer_masks_parent1, ARRAY_SIZE(*layer_masks_parent1)); allowed_parent2 = allowed_parent2 || landlock_unmask_layers( rule, access_masked_parent2, layer_masks_parent2, ARRAY_SIZE(*layer_masks_parent2)); /* Stops when a rule from each layer grants access. */ if (allowed_parent1 && allowed_parent2) break; jump_up: if (walker_path.dentry == walker_path.mnt->mnt_root) { if (follow_up(&walker_path)) { /* Ignores hidden mount points. */ goto jump_up; } else { /* * Stops at the real root. Denies access * because not all layers have granted access. */ break; } } if (unlikely(IS_ROOT(walker_path.dentry))) { if (likely(walker_path.mnt->mnt_flags & MNT_INTERNAL)) { /* * Stops and allows access when reaching disconnected root * directories that are part of internal filesystems (e.g. nsfs, * which is reachable through /proc/<pid>/ns/<namespace>). */ allowed_parent1 = true; allowed_parent2 = true; break; } /* * We reached a disconnected root directory from a bind mount. * Let's continue the walk with the mount point we missed. */ dput(walker_path.dentry); walker_path.dentry = walker_path.mnt->mnt_root; dget(walker_path.dentry); } else { struct dentry *const parent_dentry = dget_parent(walker_path.dentry); dput(walker_path.dentry); walker_path.dentry = parent_dentry; } } path_put(&walker_path); /* * Check CONFIG_AUDIT to enable elision of log_request_parent* and * associated caller's stack variables thanks to dead code elimination. */ #ifdef CONFIG_AUDIT if (!allowed_parent1 && log_request_parent1) { log_request_parent1->type = LANDLOCK_REQUEST_FS_ACCESS; log_request_parent1->audit.type = LSM_AUDIT_DATA_PATH; log_request_parent1->audit.u.path = *path; log_request_parent1->access = access_masked_parent1; log_request_parent1->layer_masks = layer_masks_parent1; log_request_parent1->layer_masks_size = ARRAY_SIZE(*layer_masks_parent1); } if (!allowed_parent2 && log_request_parent2) { log_request_parent2->type = LANDLOCK_REQUEST_FS_ACCESS; log_request_parent2->audit.type = LSM_AUDIT_DATA_PATH; log_request_parent2->audit.u.path = *path; log_request_parent2->access = access_masked_parent2; log_request_parent2->layer_masks = layer_masks_parent2; log_request_parent2->layer_masks_size = ARRAY_SIZE(*layer_masks_parent2); } #endif /* CONFIG_AUDIT */ return allowed_parent1 && allowed_parent2; } static int current_check_access_path(const struct path *const path, access_mask_t access_request) { const struct access_masks masks = { .fs = access_request, }; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), masks, NULL); layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; struct landlock_request request = {}; if (!subject) return 0; access_request = landlock_init_layer_masks(subject->domain, access_request, &layer_masks, LANDLOCK_KEY_INODE); if (is_access_to_paths_allowed(subject->domain, path, access_request, &layer_masks, &request, NULL, 0, NULL, NULL, NULL)) return 0; landlock_log_denial(subject, &request); return -EACCES; } static __attribute_const__ access_mask_t get_mode_access(const umode_t mode) { switch (mode & S_IFMT) { case S_IFLNK: return LANDLOCK_ACCESS_FS_MAKE_SYM; case S_IFDIR: return LANDLOCK_ACCESS_FS_MAKE_DIR; case S_IFCHR: return LANDLOCK_ACCESS_FS_MAKE_CHAR; case S_IFBLK: return LANDLOCK_ACCESS_FS_MAKE_BLOCK; case S_IFIFO: return LANDLOCK_ACCESS_FS_MAKE_FIFO; case S_IFSOCK: return LANDLOCK_ACCESS_FS_MAKE_SOCK; case S_IFREG: case 0: /* A zero mode translates to S_IFREG. */ default: /* Treats weird files as regular files. */ return LANDLOCK_ACCESS_FS_MAKE_REG; } } static access_mask_t maybe_remove(const struct dentry *const dentry) { if (d_is_negative(dentry)) return 0; return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : LANDLOCK_ACCESS_FS_REMOVE_FILE; } /** * collect_domain_accesses - Walk through a file path and collect accesses * * @domain: Domain to check against. * @mnt_root: Last directory to check. * @dir: Directory to start the walk from. * @layer_masks_dom: Where to store the collected accesses. * * This helper is useful to begin a path walk from the @dir directory to a * @mnt_root directory used as a mount point. This mount point is the common * ancestor between the source and the destination of a renamed and linked * file. While walking from @dir to @mnt_root, we record all the domain's * allowed accesses in @layer_masks_dom. * * Because of disconnected directories, this walk may not reach @mnt_dir. In * this case, the walk will continue to @mnt_dir after this call. * * This is similar to is_access_to_paths_allowed() but much simpler because it * only handles walking on the same mount point and only checks one set of * accesses. * * Returns: * - true if all the domain access rights are allowed for @dir; * - false if the walk reached @mnt_root. */ static bool collect_domain_accesses( const struct landlock_ruleset *const domain, const struct dentry *const mnt_root, struct dentry *dir, layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) { unsigned long access_dom; bool ret = false; if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) return true; if (is_nouser_or_private(dir)) return true; access_dom = landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, layer_masks_dom, LANDLOCK_KEY_INODE); dget(dir); while (true) { struct dentry *parent_dentry; /* Gets all layers allowing all domain accesses. */ if (landlock_unmask_layers(find_rule(domain, dir), access_dom, layer_masks_dom, ARRAY_SIZE(*layer_masks_dom))) { /* * Stops when all handled accesses are allowed by at * least one rule in each layer. */ ret = true; break; } /* * Stops at the mount point or the filesystem root for a disconnected * directory. */ if (dir == mnt_root || unlikely(IS_ROOT(dir))) break; parent_dentry = dget_parent(dir); dput(dir); dir = parent_dentry; } dput(dir); return ret; } /** * current_check_refer_path - Check if a rename or link action is allowed * * @old_dentry: File or directory requested to be moved or linked. * @new_dir: Destination parent directory. * @new_dentry: Destination file or directory. * @removable: Sets to true if it is a rename operation. * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. * * Because of its unprivileged constraints, Landlock relies on file hierarchies * (and not only inodes) to tie access rights to files. Being able to link or * rename a file hierarchy brings some challenges. Indeed, moving or linking a * file (i.e. creating a new reference to an inode) can have an impact on the * actions allowed for a set of files if it would change its parent directory * (i.e. reparenting). * * To avoid trivial access right bypasses, Landlock first checks if the file or * directory requested to be moved would gain new access rights inherited from * its new hierarchy. Before returning any error, Landlock then checks that * the parent source hierarchy and the destination hierarchy would allow the * link or rename action. If it is not the case, an error with EACCES is * returned to inform user space that there is no way to remove or create the * requested source file type. If it should be allowed but the new inherited * access rights would be greater than the source access rights, then the * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables * user space to abort the whole operation if there is no way to do it, or to * manually copy the source to the destination if this remains allowed, e.g. * because file creation is allowed on the destination directory but not direct * linking. * * To achieve this goal, the kernel needs to compare two file hierarchies: the * one identifying the source file or directory (including itself), and the * destination one. This can be seen as a multilayer partial ordering problem. * The kernel walks through these paths and collects in a matrix the access * rights that are denied per layer. These matrices are then compared to see * if the destination one has more (or the same) restrictions as the source * one. If this is the case, the requested action will not return EXDEV, which * doesn't mean the action is allowed. The parent hierarchy of the source * (i.e. parent directory), and the destination hierarchy must also be checked * to verify that they explicitly allow such action (i.e. referencing, * creation and potentially removal rights). The kernel implementation is then * required to rely on potentially four matrices of access rights: one for the * source file or directory (i.e. the child), a potentially other one for the * other source/destination (in case of RENAME_EXCHANGE), one for the source * parent hierarchy and a last one for the destination hierarchy. These * ephemeral matrices take some space on the stack, which limits the number of * layers to a deemed reasonable number: 16. * * Returns: * - 0 if access is allowed; * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; * - -EACCES if file removal or creation is denied. */ static int current_check_refer_path(struct dentry *const old_dentry, const struct path *const new_dir, struct dentry *const new_dentry, const bool removable, const bool exchange) { const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), any_fs, NULL); bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; struct dentry *old_parent; layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; struct landlock_request request1 = {}, request2 = {}; if (!subject) return 0; if (unlikely(d_is_negative(old_dentry))) return -ENOENT; if (exchange) { if (unlikely(d_is_negative(new_dentry))) return -ENOENT; access_request_parent1 = get_mode_access(d_backing_inode(new_dentry)->i_mode); } else { access_request_parent1 = 0; } access_request_parent2 = get_mode_access(d_backing_inode(old_dentry)->i_mode); if (removable) { access_request_parent1 |= maybe_remove(old_dentry); access_request_parent2 |= maybe_remove(new_dentry); } /* The mount points are the same for old and new paths, cf. EXDEV. */ if (old_dentry->d_parent == new_dir->dentry) { /* * The LANDLOCK_ACCESS_FS_REFER access right is not required * for same-directory referer (i.e. no reparenting). */ access_request_parent1 = landlock_init_layer_masks( subject->domain, access_request_parent1 | access_request_parent2, &layer_masks_parent1, LANDLOCK_KEY_INODE); if (is_access_to_paths_allowed(subject->domain, new_dir, access_request_parent1, &layer_masks_parent1, &request1, NULL, 0, NULL, NULL, NULL)) return 0; landlock_log_denial(subject, &request1); return -EACCES; } access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; /* Saves the common mount point. */ mnt_dir.mnt = new_dir->mnt; mnt_dir.dentry = new_dir->mnt->mnt_root; /* * old_dentry may be the root of the common mount point and * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and * OPEN_TREE_CLONE). We do not need to call dget(old_parent) because * we keep a reference to old_dentry. */ old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry : old_dentry->d_parent; /* new_dir->dentry is equal to new_dentry->d_parent */ allow_parent1 = collect_domain_accesses(subject->domain, mnt_dir.dentry, old_parent, &layer_masks_parent1); allow_parent2 = collect_domain_accesses(subject->domain, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); if (allow_parent1 && allow_parent2) return 0; /* * To be able to compare source and destination domain access rights, * take into account the @old_dentry access rights aggregated with its * parent access rights. This will be useful to compare with the * destination parent access rights. */ if (is_access_to_paths_allowed( subject->domain, &mnt_dir, access_request_parent1, &layer_masks_parent1, &request1, old_dentry, access_request_parent2, &layer_masks_parent2, &request2, exchange ? new_dentry : NULL)) return 0; if (request1.access) { request1.audit.u.path.dentry = old_parent; landlock_log_denial(subject, &request1); } if (request2.access) { request2.audit.u.path.dentry = new_dir->dentry; landlock_log_denial(subject, &request2); } /* * This prioritizes EACCES over EXDEV for all actions, including * renames with RENAME_EXCHANGE. */ if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) || is_eacces(&layer_masks_parent2, access_request_parent2))) return -EACCES; /* * Gracefully forbids reparenting if the destination directory * hierarchy is not a superset of restrictions of the source directory * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the * source or the destination. */ return -EXDEV; } /* Inode hooks */ static void hook_inode_free_security_rcu(void *inode_security) { struct landlock_inode_security *inode_sec; /* * All inodes must already have been untied from their object by * release_inode() or hook_sb_delete(). */ inode_sec = inode_security + landlock_blob_sizes.lbs_inode; WARN_ON_ONCE(inode_sec->object); } /* Super-block hooks */ /* * Release the inodes used in a security policy. * * Cf. fsnotify_unmount_inodes() and evict_inodes() */ static void hook_sb_delete(struct super_block *const sb) { struct inode *inode, *prev_inode = NULL; if (!landlock_initialized) return; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { struct landlock_object *object; /* Only handles referenced inodes. */ if (!icount_read(inode)) continue; /* * Protects against concurrent modification of inode (e.g. * from get_inode_object()). */ spin_lock(&inode->i_lock); /* * Checks I_FREEING and I_WILL_FREE to protect against a race * condition when release_inode() just called iput(), which * could lead to a NULL dereference of inode->security or a * second call to iput() for the same Landlock object. Also * checks I_NEW because such inode cannot be tied to an object. */ if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_NEW)) { spin_unlock(&inode->i_lock); continue; } rcu_read_lock(); object = rcu_dereference(landlock_inode(inode)->object); if (!object) { rcu_read_unlock(); spin_unlock(&inode->i_lock); continue; } /* Keeps a reference to this inode until the next loop walk. */ __iget(inode); spin_unlock(&inode->i_lock); /* * If there is no concurrent release_inode() ongoing, then we * are in charge of calling iput() on this inode, otherwise we * will just wait for it to finish. */ spin_lock(&object->lock); if (object->underobj == inode) { object->underobj = NULL; spin_unlock(&object->lock); rcu_read_unlock(); /* * Because object->underobj was not NULL, * release_inode() and get_inode_object() guarantee * that it is safe to reset * landlock_inode(inode)->object while it is not NULL. * It is therefore not necessary to lock inode->i_lock. */ rcu_assign_pointer(landlock_inode(inode)->object, NULL); /* * At this point, we own the ihold() reference that was * originally set up by get_inode_object() and the * __iget() reference that we just set in this loop * walk. Therefore there are at least two references * on the inode. */ iput_not_last(inode); } else { spin_unlock(&object->lock); rcu_read_unlock(); } if (prev_inode) { /* * At this point, we still own the __iget() reference * that we just set in this loop walk. Therefore we * can drop the list lock and know that the inode won't * disappear from under us until the next loop walk. */ spin_unlock(&sb->s_inode_list_lock); /* * We can now actually put the inode reference from the * previous loop walk, which is not needed anymore. */ iput(prev_inode); cond_resched(); spin_lock(&sb->s_inode_list_lock); } prev_inode = inode; } spin_unlock(&sb->s_inode_list_lock); /* Puts the inode reference from the last loop walk, if any. */ if (prev_inode) iput(prev_inode); /* Waits for pending iput() in release_inode(). */ wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read(&landlock_superblock(sb)->inode_refs)); } static void log_fs_change_topology_path(const struct landlock_cred_security *const subject, size_t handle_layer, const struct path *const path) { landlock_log_denial(subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_FS_CHANGE_TOPOLOGY, .audit = { .type = LSM_AUDIT_DATA_PATH, .u.path = *path, }, .layer_plus_one = handle_layer + 1, }); } static void log_fs_change_topology_dentry( const struct landlock_cred_security *const subject, size_t handle_layer, struct dentry *const dentry) { landlock_log_denial(subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_FS_CHANGE_TOPOLOGY, .audit = { .type = LSM_AUDIT_DATA_DENTRY, .u.dentry = dentry, }, .layer_plus_one = handle_layer + 1, }); } /* * Because a Landlock security policy is defined according to the filesystem * topology (i.e. the mount namespace), changing it may grant access to files * not previously allowed. * * To make it simple, deny any filesystem topology modification by landlocked * processes. Non-landlocked processes may still change the namespace of a * landlocked process, but this kind of threat must be handled by a system-wide * access-control security policy. * * This could be lifted in the future if Landlock can safely handle mount * namespace updates requested by a landlocked process. Indeed, we could * update the current domain (which is currently read-only) by taking into * account the accesses of the source and the destination of a new mount point. * However, it would also require to make all the child domains dynamically * inherit these new constraints. Anyway, for backward compatibility reasons, * a dedicated user space option would be required (e.g. as a ruleset flag). */ static int hook_sb_mount(const char *const dev_name, const struct path *const path, const char *const type, const unsigned long flags, void *const data) { size_t handle_layer; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), any_fs, &handle_layer); if (!subject) return 0; log_fs_change_topology_path(subject, handle_layer, path); return -EPERM; } static int hook_move_mount(const struct path *const from_path, const struct path *const to_path) { size_t handle_layer; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), any_fs, &handle_layer); if (!subject) return 0; log_fs_change_topology_path(subject, handle_layer, to_path); return -EPERM; } /* * Removing a mount point may reveal a previously hidden file hierarchy, which * may then grant access to files, which may have previously been forbidden. */ static int hook_sb_umount(struct vfsmount *const mnt, const int flags) { size_t handle_layer; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), any_fs, &handle_layer); if (!subject) return 0; log_fs_change_topology_dentry(subject, handle_layer, mnt->mnt_root); return -EPERM; } static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) { size_t handle_layer; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), any_fs, &handle_layer); if (!subject) return 0; log_fs_change_topology_dentry(subject, handle_layer, sb->s_root); return -EPERM; } /* * pivot_root(2), like mount(2), changes the current mount namespace. It must * then be forbidden for a landlocked process. * * However, chroot(2) may be allowed because it only changes the relative root * directory of the current process. Moreover, it can be used to restrict the * view of the filesystem. */ static int hook_sb_pivotroot(const struct path *const old_path, const struct path *const new_path) { size_t handle_layer; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), any_fs, &handle_layer); if (!subject) return 0; log_fs_change_topology_path(subject, handle_layer, new_path); return -EPERM; } /* Path hooks */ static int hook_path_link(struct dentry *const old_dentry, const struct path *const new_dir, struct dentry *const new_dentry) { return current_check_refer_path(old_dentry, new_dir, new_dentry, false, false); } static int hook_path_rename(const struct path *const old_dir, struct dentry *const old_dentry, const struct path *const new_dir, struct dentry *const new_dentry, const unsigned int flags) { /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ return current_check_refer_path(old_dentry, new_dir, new_dentry, true, !!(flags & RENAME_EXCHANGE)); } static int hook_path_mkdir(const struct path *const dir, struct dentry *const dentry, const umode_t mode) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); } static int hook_path_mknod(const struct path *const dir, struct dentry *const dentry, const umode_t mode, const unsigned int dev) { return current_check_access_path(dir, get_mode_access(mode)); } static int hook_path_symlink(const struct path *const dir, struct dentry *const dentry, const char *const old_name) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); } static int hook_path_unlink(const struct path *const dir, struct dentry *const dentry) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); } static int hook_path_rmdir(const struct path *const dir, struct dentry *const dentry) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); } static int hook_path_truncate(const struct path *const path) { return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE); } /* File hooks */ /** * get_required_file_open_access - Get access needed to open a file * * @file: File being opened. * * Returns the access rights that are required for opening the given file, * depending on the file type and open mode. */ static access_mask_t get_required_file_open_access(const struct file *const file) { access_mask_t access = 0; if (file->f_mode & FMODE_READ) { /* A directory can only be opened in read mode. */ if (S_ISDIR(file_inode(file)->i_mode)) return LANDLOCK_ACCESS_FS_READ_DIR; access = LANDLOCK_ACCESS_FS_READ_FILE; } if (file->f_mode & FMODE_WRITE) access |= LANDLOCK_ACCESS_FS_WRITE_FILE; /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ if (file->f_flags & __FMODE_EXEC) access |= LANDLOCK_ACCESS_FS_EXECUTE; return access; } static int hook_file_alloc_security(struct file *const file) { /* * Grants all access rights, even if most of them are not checked later * on. It is more consistent. * * Notably, file descriptors for regular files can also be acquired * without going through the file_open hook, for example when using * memfd_create(2). */ landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; return 0; } static bool is_device(const struct file *const file) { const struct inode *inode = file_inode(file); return S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); } static int hook_file_open(struct file *const file) { layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; access_mask_t open_access_request, full_access_request, allowed_access, optional_access; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(file->f_cred, any_fs, NULL); struct landlock_request request = {}; if (!subject) return 0; /* * Because a file may be opened with O_PATH, get_required_file_open_access() * may return 0. This case will be handled with a future Landlock * evolution. */ open_access_request = get_required_file_open_access(file); /* * We look up more access than what we immediately need for open(), so * that we can later authorize operations on opened files. */ optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; if (is_device(file)) optional_access |= LANDLOCK_ACCESS_FS_IOCTL_DEV; full_access_request = open_access_request | optional_access; if (is_access_to_paths_allowed( subject->domain, &file->f_path, landlock_init_layer_masks(subject->domain, full_access_request, &layer_masks, LANDLOCK_KEY_INODE), &layer_masks, &request, NULL, 0, NULL, NULL, NULL)) { allowed_access = full_access_request; } else { unsigned long access_bit; const unsigned long access_req = full_access_request; /* * Calculate the actual allowed access rights from layer_masks. * Add each access right to allowed_access which has not been * vetoed by any layer. */ allowed_access = 0; for_each_set_bit(access_bit, &access_req, ARRAY_SIZE(layer_masks)) { if (!layer_masks[access_bit]) allowed_access |= BIT_ULL(access_bit); } } /* * For operations on already opened files (i.e. ftruncate()), it is the * access rights at the time of open() which decide whether the * operation is permitted. Therefore, we record the relevant subset of * file access rights in the opened struct file. */ landlock_file(file)->allowed_access = allowed_access; #ifdef CONFIG_AUDIT landlock_file(file)->deny_masks = landlock_get_deny_masks( _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks, ARRAY_SIZE(layer_masks)); #endif /* CONFIG_AUDIT */ if ((open_access_request & allowed_access) == open_access_request) return 0; /* Sets access to reflect the actual request. */ request.access = open_access_request; landlock_log_denial(subject, &request); return -EACCES; } static int hook_file_truncate(struct file *const file) { /* * Allows truncation if the truncate right was available at the time of * opening the file, to get a consistent access check as for read, write * and execute operations. * * Note: For checks done based on the file's Landlock allowed access, we * enforce them independently of whether the current thread is in a * Landlock domain, so that open files passed between independent * processes retain their behaviour. */ if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) return 0; landlock_log_denial(landlock_cred(file->f_cred), &(struct landlock_request) { .type = LANDLOCK_REQUEST_FS_ACCESS, .audit = { .type = LSM_AUDIT_DATA_FILE, .u.file = file, }, .all_existing_optional_access = _LANDLOCK_ACCESS_FS_OPTIONAL, .access = LANDLOCK_ACCESS_FS_TRUNCATE, #ifdef CONFIG_AUDIT .deny_masks = landlock_file(file)->deny_masks, #endif /* CONFIG_AUDIT */ }); return -EACCES; } static int hook_file_ioctl_common(const struct file *const file, const unsigned int cmd, const bool is_compat) { access_mask_t allowed_access = landlock_file(file)->allowed_access; /* * It is the access rights at the time of opening the file which * determine whether IOCTL can be used on the opened file later. * * The access right is attached to the opened file in hook_file_open(). */ if (allowed_access & LANDLOCK_ACCESS_FS_IOCTL_DEV) return 0; if (!is_device(file)) return 0; if (unlikely(is_compat) ? is_masked_device_ioctl_compat(cmd) : is_masked_device_ioctl(cmd)) return 0; landlock_log_denial(landlock_cred(file->f_cred), &(struct landlock_request) { .type = LANDLOCK_REQUEST_FS_ACCESS, .audit = { .type = LSM_AUDIT_DATA_IOCTL_OP, .u.op = &(struct lsm_ioctlop_audit) { .path = file->f_path, .cmd = cmd, }, }, .all_existing_optional_access = _LANDLOCK_ACCESS_FS_OPTIONAL, .access = LANDLOCK_ACCESS_FS_IOCTL_DEV, #ifdef CONFIG_AUDIT .deny_masks = landlock_file(file)->deny_masks, #endif /* CONFIG_AUDIT */ }); return -EACCES; } static int hook_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return hook_file_ioctl_common(file, cmd, false); } static int hook_file_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { return hook_file_ioctl_common(file, cmd, true); } /* * Always allow sending signals between threads of the same process. This * ensures consistency with hook_task_kill(). */ static bool control_current_fowner(struct fown_struct *const fown) { struct task_struct *p; /* * Lock already held by __f_setown(), see commit 26f204380a3c ("fs: Fix * file_set_fowner LSM hook inconsistencies"). */ lockdep_assert_held(&fown->lock); /* * Some callers (e.g. fcntl_dirnotify) may not be in an RCU read-side * critical section. */ guard(rcu)(); p = pid_task(fown->pid, fown->pid_type); if (!p) return true; return !same_thread_group(p, current); } static void hook_file_set_fowner(struct file *file) { struct landlock_ruleset *prev_dom; struct landlock_cred_security fown_subject = {}; size_t fown_layer = 0; if (control_current_fowner(file_f_owner(file))) { static const struct access_masks signal_scope = { .scope = LANDLOCK_SCOPE_SIGNAL, }; const struct landlock_cred_security *new_subject = landlock_get_applicable_subject( current_cred(), signal_scope, &fown_layer); if (new_subject) { landlock_get_ruleset(new_subject->domain); fown_subject = *new_subject; } } prev_dom = landlock_file(file)->fown_subject.domain; landlock_file(file)->fown_subject = fown_subject; #ifdef CONFIG_AUDIT landlock_file(file)->fown_layer = fown_layer; #endif /* CONFIG_AUDIT*/ /* May be called in an RCU read-side critical section. */ landlock_put_ruleset_deferred(prev_dom); } static void hook_file_free_security(struct file *file) { landlock_put_ruleset_deferred(landlock_file(file)->fown_subject.domain); } static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu), LSM_HOOK_INIT(sb_delete, hook_sb_delete), LSM_HOOK_INIT(sb_mount, hook_sb_mount), LSM_HOOK_INIT(move_mount, hook_move_mount), LSM_HOOK_INIT(sb_umount, hook_sb_umount), LSM_HOOK_INIT(sb_remount, hook_sb_remount), LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), LSM_HOOK_INIT(path_link, hook_path_link), LSM_HOOK_INIT(path_rename, hook_path_rename), LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), LSM_HOOK_INIT(path_mknod, hook_path_mknod), LSM_HOOK_INIT(path_symlink, hook_path_symlink), LSM_HOOK_INIT(path_unlink, hook_path_unlink), LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), LSM_HOOK_INIT(path_truncate, hook_path_truncate), LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security), LSM_HOOK_INIT(file_open, hook_file_open), LSM_HOOK_INIT(file_truncate, hook_file_truncate), LSM_HOOK_INIT(file_ioctl, hook_file_ioctl), LSM_HOOK_INIT(file_ioctl_compat, hook_file_ioctl_compat), LSM_HOOK_INIT(file_set_fowner, hook_file_set_fowner), LSM_HOOK_INIT(file_free_security, hook_file_free_security), }; __init void landlock_add_fs_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), &landlock_lsmid); } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST /* clang-format off */ static struct kunit_case test_cases[] = { KUNIT_CASE(test_no_more_access), KUNIT_CASE(test_scope_to_request_with_exec_none), KUNIT_CASE(test_scope_to_request_with_exec_some), KUNIT_CASE(test_scope_to_request_without_access), KUNIT_CASE(test_is_eacces_with_none), KUNIT_CASE(test_is_eacces_with_refer), KUNIT_CASE(test_is_eacces_with_write), {} }; /* clang-format on */ static struct kunit_suite test_suite = { .name = "landlock_fs", .test_cases = test_cases, }; kunit_test_suite(test_suite); #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ |
| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 | // SPDX-License-Identifier: GPL-2.0-only /* * i8042 keyboard and mouse controller driver for Linux * * Copyright (c) 1999-2004 Vojtech Pavlik */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/init.h> #include <linux/serio.h> #include <linux/err.h> #include <linux/rcupdate.h> #include <linux/platform_device.h> #include <linux/i8042.h> #include <linux/slab.h> #include <linux/suspend.h> #include <linux/property.h> #include <asm/io.h> MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>"); MODULE_DESCRIPTION("i8042 keyboard and mouse controller driver"); MODULE_LICENSE("GPL"); static bool i8042_nokbd; module_param_named(nokbd, i8042_nokbd, bool, 0); MODULE_PARM_DESC(nokbd, "Do not probe or use KBD port."); static bool i8042_noaux; module_param_named(noaux, i8042_noaux, bool, 0); MODULE_PARM_DESC(noaux, "Do not probe or use AUX (mouse) port."); static bool i8042_nomux; module_param_named(nomux, i8042_nomux, bool, 0); MODULE_PARM_DESC(nomux, "Do not check whether an active multiplexing controller is present."); static bool i8042_unlock; module_param_named(unlock, i8042_unlock, bool, 0); MODULE_PARM_DESC(unlock, "Ignore keyboard lock."); static bool i8042_probe_defer; module_param_named(probe_defer, i8042_probe_defer, bool, 0); MODULE_PARM_DESC(probe_defer, "Allow deferred probing."); enum i8042_controller_reset_mode { I8042_RESET_NEVER, I8042_RESET_ALWAYS, I8042_RESET_ON_S2RAM, #define I8042_RESET_DEFAULT I8042_RESET_ON_S2RAM }; static enum i8042_controller_reset_mode i8042_reset = I8042_RESET_DEFAULT; static int i8042_set_reset(const char *val, const struct kernel_param *kp) { enum i8042_controller_reset_mode *arg = kp->arg; int error; bool reset; if (val) { error = kstrtobool(val, &reset); if (error) return error; } else { reset = true; } *arg = reset ? I8042_RESET_ALWAYS : I8042_RESET_NEVER; return 0; } static const struct kernel_param_ops param_ops_reset_param = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = i8042_set_reset, }; #define param_check_reset_param(name, p) \ __param_check(name, p, enum i8042_controller_reset_mode) module_param_named(reset, i8042_reset, reset_param, 0); MODULE_PARM_DESC(reset, "Reset controller on resume, cleanup or both"); static bool i8042_direct; module_param_named(direct, i8042_direct, bool, 0); MODULE_PARM_DESC(direct, "Put keyboard port into non-translated mode."); static bool i8042_dumbkbd; module_param_named(dumbkbd, i8042_dumbkbd, bool, 0); MODULE_PARM_DESC(dumbkbd, "Pretend that controller can only read data from keyboard"); static bool i8042_noloop; module_param_named(noloop, i8042_noloop, bool, 0); MODULE_PARM_DESC(noloop, "Disable the AUX Loopback command while probing for the AUX port"); static bool i8042_notimeout; module_param_named(notimeout, i8042_notimeout, bool, 0); MODULE_PARM_DESC(notimeout, "Ignore timeouts signalled by i8042"); static bool i8042_kbdreset; module_param_named(kbdreset, i8042_kbdreset, bool, 0); MODULE_PARM_DESC(kbdreset, "Reset device connected to KBD port"); #ifdef CONFIG_X86 static bool i8042_dritek; module_param_named(dritek, i8042_dritek, bool, 0); MODULE_PARM_DESC(dritek, "Force enable the Dritek keyboard extension"); #endif #ifdef CONFIG_PNP static bool i8042_nopnp; module_param_named(nopnp, i8042_nopnp, bool, 0); MODULE_PARM_DESC(nopnp, "Do not use PNP to detect controller settings"); #endif static bool i8042_forcenorestore; module_param_named(forcenorestore, i8042_forcenorestore, bool, 0); MODULE_PARM_DESC(forcenorestore, "Force no restore on s3 resume, copying s2idle behaviour"); #define DEBUG #ifdef DEBUG static bool i8042_debug; module_param_named(debug, i8042_debug, bool, 0600); MODULE_PARM_DESC(debug, "Turn i8042 debugging mode on and off"); static bool i8042_unmask_kbd_data; module_param_named(unmask_kbd_data, i8042_unmask_kbd_data, bool, 0600); MODULE_PARM_DESC(unmask_kbd_data, "Unconditional enable (may reveal sensitive data) of normally sanitize-filtered kbd data traffic debug log [pre-condition: i8042.debug=1 enabled]"); #endif static bool i8042_present; static bool i8042_bypass_aux_irq_test; static char i8042_kbd_firmware_id[128]; static char i8042_aux_firmware_id[128]; static struct fwnode_handle *i8042_kbd_fwnode; #include "i8042.h" /* * i8042_lock protects serialization between i8042_command and * the interrupt handler. */ static DEFINE_SPINLOCK(i8042_lock); /* * Writers to AUX and KBD ports as well as users issuing i8042_command * directly should acquire i8042_mutex (by means of calling * i8042_lock_chip() and i8042_unlock_chip() helpers) to ensure that * they do not disturb each other (unfortunately in many i8042 * implementations write to one of the ports will immediately abort * command that is being processed by another port). */ static DEFINE_MUTEX(i8042_mutex); struct i8042_port { struct serio *serio; int irq; bool exists; bool driver_bound; signed char mux; }; #define I8042_KBD_PORT_NO 0 #define I8042_AUX_PORT_NO 1 #define I8042_MUX_PORT_NO 2 #define I8042_NUM_PORTS (I8042_NUM_MUX_PORTS + 2) static struct i8042_port i8042_ports[I8042_NUM_PORTS]; static unsigned char i8042_initial_ctr; static unsigned char i8042_ctr; static bool i8042_mux_present; static bool i8042_kbd_irq_registered; static bool i8042_aux_irq_registered; static unsigned char i8042_suppress_kbd_ack; static struct platform_device *i8042_platform_device; static struct notifier_block i8042_kbd_bind_notifier_block; static bool i8042_handle_data(int irq); static i8042_filter_t i8042_platform_filter; static void *i8042_platform_filter_context; void i8042_lock_chip(void) { mutex_lock(&i8042_mutex); } EXPORT_SYMBOL(i8042_lock_chip); void i8042_unlock_chip(void) { mutex_unlock(&i8042_mutex); } EXPORT_SYMBOL(i8042_unlock_chip); int i8042_install_filter(i8042_filter_t filter, void *context) { guard(spinlock_irqsave)(&i8042_lock); if (i8042_platform_filter) return -EBUSY; i8042_platform_filter = filter; i8042_platform_filter_context = context; return 0; } EXPORT_SYMBOL(i8042_install_filter); int i8042_remove_filter(i8042_filter_t filter) { guard(spinlock_irqsave)(&i8042_lock); if (i8042_platform_filter != filter) return -EINVAL; i8042_platform_filter = NULL; i8042_platform_filter_context = NULL; return 0; } EXPORT_SYMBOL(i8042_remove_filter); /* * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to * be ready for reading values from it / writing values to it. * Called always with i8042_lock held. */ static int i8042_wait_read(void) { int i = 0; while ((~i8042_read_status() & I8042_STR_OBF) && (i < I8042_CTL_TIMEOUT)) { udelay(50); i++; } return -(i == I8042_CTL_TIMEOUT); } static int i8042_wait_write(void) { int i = 0; while ((i8042_read_status() & I8042_STR_IBF) && (i < I8042_CTL_TIMEOUT)) { udelay(50); i++; } return -(i == I8042_CTL_TIMEOUT); } /* * i8042_flush() flushes all data that may be in the keyboard and mouse buffers * of the i8042 down the toilet. */ static int i8042_flush(void) { unsigned char data, str; int count = 0; guard(spinlock_irqsave)(&i8042_lock); while ((str = i8042_read_status()) & I8042_STR_OBF) { if (count++ >= I8042_BUFFER_SIZE) return -EIO; udelay(50); data = i8042_read_data(); dbg("%02x <- i8042 (flush, %s)\n", data, str & I8042_STR_AUXDATA ? "aux" : "kbd"); } return 0; } /* * i8042_command() executes a command on the i8042. It also sends the input * parameter(s) of the commands to it, and receives the output value(s). The * parameters are to be stored in the param array, and the output is placed * into the same array. The number of the parameters and output values is * encoded in bits 8-11 of the command number. */ static int __i8042_command(unsigned char *param, int command) { int i, error; if (i8042_noloop && command == I8042_CMD_AUX_LOOP) return -1; error = i8042_wait_write(); if (error) return error; dbg("%02x -> i8042 (command)\n", command & 0xff); i8042_write_command(command & 0xff); for (i = 0; i < ((command >> 12) & 0xf); i++) { error = i8042_wait_write(); if (error) { dbg(" -- i8042 (wait write timeout)\n"); return error; } dbg("%02x -> i8042 (parameter)\n", param[i]); i8042_write_data(param[i]); } for (i = 0; i < ((command >> 8) & 0xf); i++) { error = i8042_wait_read(); if (error) { dbg(" -- i8042 (wait read timeout)\n"); return error; } if (command == I8042_CMD_AUX_LOOP && !(i8042_read_status() & I8042_STR_AUXDATA)) { dbg(" -- i8042 (auxerr)\n"); return -1; } param[i] = i8042_read_data(); dbg("%02x <- i8042 (return)\n", param[i]); } return 0; } int i8042_command(unsigned char *param, int command) { if (!i8042_present) return -1; guard(spinlock_irqsave)(&i8042_lock); return __i8042_command(param, command); } EXPORT_SYMBOL(i8042_command); /* * i8042_kbd_write() sends a byte out through the keyboard interface. */ static int i8042_kbd_write(struct serio *port, unsigned char c) { int error; guard(spinlock_irqsave)(&i8042_lock); error = i8042_wait_write(); if (error) return error; dbg("%02x -> i8042 (kbd-data)\n", c); i8042_write_data(c); return 0; } /* * i8042_aux_write() sends a byte out through the aux interface. */ static int i8042_aux_write(struct serio *serio, unsigned char c) { struct i8042_port *port = serio->port_data; return i8042_command(&c, port->mux == -1 ? I8042_CMD_AUX_SEND : I8042_CMD_MUX_SEND + port->mux); } /* * i8042_port_close attempts to clear AUX or KBD port state by disabling * and then re-enabling it. */ static void i8042_port_close(struct serio *serio) { int irq_bit; int disable_bit; const char *port_name; if (serio == i8042_ports[I8042_AUX_PORT_NO].serio) { irq_bit = I8042_CTR_AUXINT; disable_bit = I8042_CTR_AUXDIS; port_name = "AUX"; } else { irq_bit = I8042_CTR_KBDINT; disable_bit = I8042_CTR_KBDDIS; port_name = "KBD"; } i8042_ctr &= ~irq_bit; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) pr_warn("Can't write CTR while closing %s port\n", port_name); udelay(50); i8042_ctr &= ~disable_bit; i8042_ctr |= irq_bit; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) pr_err("Can't reactivate %s port\n", port_name); /* * See if there is any data appeared while we were messing with * port state. */ i8042_handle_data(0); } /* * i8042_start() is called by serio core when port is about to finish * registering. It will mark port as existing so i8042_interrupt can * start sending data through it. */ static int i8042_start(struct serio *serio) { struct i8042_port *port = serio->port_data; device_set_wakeup_capable(&serio->dev, true); /* * On platforms using suspend-to-idle, allow the keyboard to * wake up the system from sleep by enabling keyboard wakeups * by default. This is consistent with keyboard wakeup * behavior on many platforms using suspend-to-RAM (ACPI S3) * by default. */ if (pm_suspend_default_s2idle() && serio == i8042_ports[I8042_KBD_PORT_NO].serio) { device_set_wakeup_enable(&serio->dev, true); } guard(spinlock_irq)(&i8042_lock); port->exists = true; return 0; } /* * i8042_stop() marks serio port as non-existing so i8042_interrupt * will not try to send data to the port that is about to go away. * The function is called by serio core as part of unregister procedure. */ static void i8042_stop(struct serio *serio) { struct i8042_port *port = serio->port_data; scoped_guard(spinlock_irq, &i8042_lock) { port->exists = false; port->serio = NULL; } /* * We need to make sure that interrupt handler finishes using * our serio port before we return from this function. * We synchronize with both AUX and KBD IRQs because there is * a (very unlikely) chance that AUX IRQ is raised for KBD port * and vice versa. */ synchronize_irq(I8042_AUX_IRQ); synchronize_irq(I8042_KBD_IRQ); } /* * i8042_filter() filters out unwanted bytes from the input data stream. * It is called from i8042_interrupt and thus is running with interrupts * off and i8042_lock held. */ static bool i8042_filter(unsigned char data, unsigned char str, struct serio *serio) { if (unlikely(i8042_suppress_kbd_ack)) { if ((~str & I8042_STR_AUXDATA) && (data == 0xfa || data == 0xfe)) { i8042_suppress_kbd_ack--; dbg("Extra keyboard ACK - filtered out\n"); return true; } } if (!i8042_platform_filter) return false; if (i8042_platform_filter(data, str, serio, i8042_platform_filter_context)) { dbg("Filtered out by platform filter\n"); return true; } return false; } /* * i8042_handle_mux() handles case when data is coming from one of * the multiplexed ports. It would be simple if not for quirks with * handling errors: * * When MUXERR condition is signalled the data register can only contain * 0xfd, 0xfe or 0xff if implementation follows the spec. Unfortunately * it is not always the case. Some KBCs also report 0xfc when there is * nothing connected to the port while others sometimes get confused which * port the data came from and signal error leaving the data intact. They * _do not_ revert to legacy mode (actually I've never seen KBC reverting * to legacy mode yet, when we see one we'll add proper handling). * Anyway, we process 0xfc, 0xfd, 0xfe and 0xff as timeouts, and for the * rest assume that the data came from the same serio last byte * was transmitted (if transmission happened not too long ago). */ static int i8042_handle_mux(u8 str, u8 *data, unsigned int *dfl) { static unsigned long last_transmit; static unsigned long last_port; unsigned int mux_port; mux_port = (str >> 6) & 3; *dfl = 0; if (str & I8042_STR_MUXERR) { dbg("MUX error, status is %02x, data is %02x\n", str, *data); switch (*data) { default: if (time_before(jiffies, last_transmit + HZ/10)) { mux_port = last_port; break; } fallthrough; /* report timeout */ case 0xfc: case 0xfd: case 0xfe: *dfl = SERIO_TIMEOUT; *data = 0xfe; break; case 0xff: *dfl = SERIO_PARITY; *data = 0xfe; break; } } last_port = mux_port; last_transmit = jiffies; return I8042_MUX_PORT_NO + mux_port; } /* * i8042_handle_data() is the most important function in this driver - * it reads the data from the i8042, determines its destination serio * port, and sends received byte to the upper layers. * * Returns true if there was data waiting, false otherwise. */ static bool i8042_handle_data(int irq) { struct i8042_port *port; struct serio *serio; unsigned char str, data; unsigned int dfl; unsigned int port_no; bool filtered; scoped_guard(spinlock_irqsave, &i8042_lock) { str = i8042_read_status(); if (unlikely(~str & I8042_STR_OBF)) return false; data = i8042_read_data(); if (i8042_mux_present && (str & I8042_STR_AUXDATA)) { port_no = i8042_handle_mux(str, &data, &dfl); } else { dfl = (str & I8042_STR_PARITY) ? SERIO_PARITY : 0; if ((str & I8042_STR_TIMEOUT) && !i8042_notimeout) dfl |= SERIO_TIMEOUT; port_no = (str & I8042_STR_AUXDATA) ? I8042_AUX_PORT_NO : I8042_KBD_PORT_NO; } port = &i8042_ports[port_no]; serio = port->exists ? port->serio : NULL; filter_dbg(port->driver_bound, data, "<- i8042 (interrupt, %d, %d%s%s)\n", port_no, irq, dfl & SERIO_PARITY ? ", bad parity" : "", dfl & SERIO_TIMEOUT ? ", timeout" : ""); filtered = i8042_filter(data, str, serio); } if (likely(serio && !filtered)) serio_interrupt(serio, data, dfl); return true; } static irqreturn_t i8042_interrupt(int irq, void *dev_id) { if (unlikely(!i8042_handle_data(irq))) { dbg("Interrupt %d, without any data\n", irq); return IRQ_NONE; } return IRQ_HANDLED; } /* * i8042_enable_kbd_port enables keyboard port on chip */ static int i8042_enable_kbd_port(void) { i8042_ctr &= ~I8042_CTR_KBDDIS; i8042_ctr |= I8042_CTR_KBDINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_KBDINT; i8042_ctr |= I8042_CTR_KBDDIS; pr_err("Failed to enable KBD port\n"); return -EIO; } return 0; } /* * i8042_enable_aux_port enables AUX (mouse) port on chip */ static int i8042_enable_aux_port(void) { i8042_ctr &= ~I8042_CTR_AUXDIS; i8042_ctr |= I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_AUXINT; i8042_ctr |= I8042_CTR_AUXDIS; pr_err("Failed to enable AUX port\n"); return -EIO; } return 0; } /* * i8042_enable_mux_ports enables 4 individual AUX ports after * the controller has been switched into Multiplexed mode */ static int i8042_enable_mux_ports(void) { unsigned char param; int i; for (i = 0; i < I8042_NUM_MUX_PORTS; i++) { i8042_command(¶m, I8042_CMD_MUX_PFX + i); i8042_command(¶m, I8042_CMD_AUX_ENABLE); } return i8042_enable_aux_port(); } /* * i8042_set_mux_mode checks whether the controller has an * active multiplexor and puts the chip into Multiplexed (true) * or Legacy (false) mode. */ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version) { unsigned char param, val; /* * Get rid of bytes in the queue. */ i8042_flush(); /* * Internal loopback test - send three bytes, they should come back from the * mouse interface, the last should be version. */ param = val = 0xf0; if (i8042_command(¶m, I8042_CMD_AUX_LOOP) || param != val) return -1; param = val = multiplex ? 0x56 : 0xf6; if (i8042_command(¶m, I8042_CMD_AUX_LOOP) || param != val) return -1; param = val = multiplex ? 0xa4 : 0xa5; if (i8042_command(¶m, I8042_CMD_AUX_LOOP) || param == val) return -1; /* * Workaround for interference with USB Legacy emulation * that causes a v10.12 MUX to be found. */ if (param == 0xac) return -1; if (mux_version) *mux_version = param; return 0; } /* * i8042_check_mux() checks whether the controller supports the PS/2 Active * Multiplexing specification by Synaptics, Phoenix, Insyde and * LCS/Telegraphics. */ static int i8042_check_mux(void) { unsigned char mux_version; if (i8042_set_mux_mode(true, &mux_version)) return -1; pr_info("Detected active multiplexing controller, rev %d.%d\n", (mux_version >> 4) & 0xf, mux_version & 0xf); /* * Disable all muxed ports by disabling AUX. */ i8042_ctr |= I8042_CTR_AUXDIS; i8042_ctr &= ~I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { pr_err("Failed to disable AUX port, can't use MUX\n"); return -EIO; } i8042_mux_present = true; return 0; } /* * The following is used to test AUX IRQ delivery. */ static struct completion i8042_aux_irq_delivered; static bool i8042_irq_being_tested; static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id) { unsigned char str, data; guard(spinlock_irqsave)(&i8042_lock); str = i8042_read_status(); if (!(str & I8042_STR_OBF)) return IRQ_NONE; data = i8042_read_data(); dbg("%02x <- i8042 (aux_test_irq, %s)\n", data, str & I8042_STR_AUXDATA ? "aux" : "kbd"); if (i8042_irq_being_tested && data == 0xa5 && (str & I8042_STR_AUXDATA)) complete(&i8042_aux_irq_delivered); return IRQ_HANDLED; } /* * i8042_toggle_aux - enables or disables AUX port on i8042 via command and * verifies success by readinng CTR. Used when testing for presence of AUX * port. */ static int i8042_toggle_aux(bool on) { unsigned char param; int i; if (i8042_command(¶m, on ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE)) return -1; /* some chips need some time to set the I8042_CTR_AUXDIS bit */ for (i = 0; i < 100; i++) { udelay(50); if (i8042_command(¶m, I8042_CMD_CTL_RCTR)) return -1; if (!(param & I8042_CTR_AUXDIS) == on) return 0; } return -1; } /* * i8042_check_aux() applies as much paranoia as it can at detecting * the presence of an AUX interface. */ static int i8042_check_aux(void) { int retval = -1; bool irq_registered = false; bool aux_loop_broken = false; unsigned char param; /* * Get rid of bytes in the queue. */ i8042_flush(); /* * Internal loopback test - filters out AT-type i8042's. Unfortunately * SiS screwed up and their 5597 doesn't support the LOOP command even * though it has an AUX port. */ param = 0x5a; retval = i8042_command(¶m, I8042_CMD_AUX_LOOP); if (retval || param != 0x5a) { /* * External connection test - filters out AT-soldered PS/2 i8042's * 0x00 - no error, 0x01-0x03 - clock/data stuck, 0xff - general error * 0xfa - no error on some notebooks which ignore the spec * Because it's common for chipsets to return error on perfectly functioning * AUX ports, we test for this only when the LOOP command failed. */ if (i8042_command(¶m, I8042_CMD_AUX_TEST) || (param && param != 0xfa && param != 0xff)) return -1; /* * If AUX_LOOP completed without error but returned unexpected data * mark it as broken */ if (!retval) aux_loop_broken = true; } /* * Bit assignment test - filters out PS/2 i8042's in AT mode */ if (i8042_toggle_aux(false)) { pr_warn("Failed to disable AUX port, but continuing anyway... Is this a SiS?\n"); pr_warn("If AUX port is really absent please use the 'i8042.noaux' option\n"); } if (i8042_toggle_aux(true)) return -1; /* * Reset keyboard (needed on some laptops to successfully detect * touchpad, e.g., some Gigabyte laptop models with Elantech * touchpads). */ if (i8042_kbdreset) { pr_warn("Attempting to reset device connected to KBD port\n"); i8042_kbd_write(NULL, (unsigned char) 0xff); } /* * Test AUX IRQ delivery to make sure BIOS did not grab the IRQ and * used it for a PCI card or somethig else. */ if (i8042_noloop || i8042_bypass_aux_irq_test || aux_loop_broken) { /* * Without LOOP command we can't test AUX IRQ delivery. Assume the port * is working and hope we are right. */ retval = 0; goto out; } if (request_irq(I8042_AUX_IRQ, i8042_aux_test_irq, IRQF_SHARED, "i8042", i8042_platform_device)) goto out; irq_registered = true; if (i8042_enable_aux_port()) goto out; scoped_guard(spinlock_irqsave, &i8042_lock) { init_completion(&i8042_aux_irq_delivered); i8042_irq_being_tested = true; param = 0xa5; retval = __i8042_command(¶m, I8042_CMD_AUX_LOOP & 0xf0ff); if (retval) goto out; } if (wait_for_completion_timeout(&i8042_aux_irq_delivered, msecs_to_jiffies(250)) == 0) { /* * AUX IRQ was never delivered so we need to flush the controller to * get rid of the byte we put there; otherwise keyboard may not work. */ dbg(" -- i8042 (aux irq test timeout)\n"); i8042_flush(); retval = -1; } out: /* * Disable the interface. */ i8042_ctr |= I8042_CTR_AUXDIS; i8042_ctr &= ~I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) retval = -1; if (irq_registered) free_irq(I8042_AUX_IRQ, i8042_platform_device); return retval; } static int i8042_controller_check(void) { if (i8042_flush()) { pr_info("No controller found\n"); return -ENODEV; } return 0; } static int i8042_controller_selftest(void) { unsigned char param; int i = 0; /* * We try this 5 times; on some really fragile systems this does not * take the first time... */ do { if (i8042_command(¶m, I8042_CMD_CTL_TEST)) { pr_err("i8042 controller selftest timeout\n"); return -ENODEV; } if (param == I8042_RET_CTL_TEST) return 0; dbg("i8042 controller selftest: %#x != %#x\n", param, I8042_RET_CTL_TEST); msleep(50); } while (i++ < 5); #ifdef CONFIG_X86 /* * On x86, we don't fail entire i8042 initialization if controller * reset fails in hopes that keyboard port will still be functional * and user will still get a working keyboard. This is especially * important on netbooks. On other arches we trust hardware more. */ pr_info("giving up on controller selftest, continuing anyway...\n"); return 0; #else pr_err("i8042 controller selftest failed\n"); return -EIO; #endif } /* * i8042_controller_init initializes the i8042 controller, and, * most importantly, sets it into non-xlated mode if that's * desired. */ static int i8042_controller_init(void) { int n = 0; unsigned char ctr[2]; /* * Save the CTR for restore on unload / reboot. */ do { if (n >= 10) { pr_err("Unable to get stable CTR read\n"); return -EIO; } if (n != 0) udelay(50); if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) { pr_err("Can't read CTR while initializing i8042\n"); return i8042_probe_defer ? -EPROBE_DEFER : -EIO; } } while (n < 2 || ctr[0] != ctr[1]); i8042_initial_ctr = i8042_ctr = ctr[0]; /* * Disable the keyboard interface and interrupt. */ i8042_ctr |= I8042_CTR_KBDDIS; i8042_ctr &= ~I8042_CTR_KBDINT; /* * Handle keylock. */ scoped_guard(spinlock_irqsave, &i8042_lock) { if (~i8042_read_status() & I8042_STR_KEYLOCK) { if (i8042_unlock) i8042_ctr |= I8042_CTR_IGNKEYLOCK; else pr_warn("Warning: Keylock active\n"); } } /* * If the chip is configured into nontranslated mode by the BIOS, don't * bother enabling translating and be happy. */ if (~i8042_ctr & I8042_CTR_XLATE) i8042_direct = true; /* * Set nontranslated mode for the kbd interface if requested by an option. * After this the kbd interface becomes a simple serial in/out, like the aux * interface is. We don't do this by default, since it can confuse notebook * BIOSes. */ if (i8042_direct) i8042_ctr &= ~I8042_CTR_XLATE; /* * Write CTR back. */ if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { pr_err("Can't write CTR while initializing i8042\n"); return -EIO; } /* * Flush whatever accumulated while we were disabling keyboard port. */ i8042_flush(); return 0; } /* * Reset the controller and reset CRT to the original value set by BIOS. */ static void i8042_controller_reset(bool s2r_wants_reset) { i8042_flush(); /* * Disable both KBD and AUX interfaces so they don't get in the way */ i8042_ctr |= I8042_CTR_KBDDIS | I8042_CTR_AUXDIS; i8042_ctr &= ~(I8042_CTR_KBDINT | I8042_CTR_AUXINT); if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) pr_warn("Can't write CTR while resetting\n"); /* * Disable MUX mode if present. */ if (i8042_mux_present) i8042_set_mux_mode(false, NULL); /* * Reset the controller if requested. */ if (i8042_reset == I8042_RESET_ALWAYS || (i8042_reset == I8042_RESET_ON_S2RAM && s2r_wants_reset)) { i8042_controller_selftest(); } /* * Restore the original control register setting. */ if (i8042_command(&i8042_initial_ctr, I8042_CMD_CTL_WCTR)) pr_warn("Can't restore CTR\n"); } /* * i8042_panic_blink() will turn the keyboard LEDs on or off and is called * when kernel panics. Flashing LEDs is useful for users running X who may * not see the console and will help distinguishing panics from "real" * lockups. * * Note that DELAY has a limit of 10ms so we will not get stuck here * waiting for KBC to free up even if KBD interrupt is off */ #define DELAY do { mdelay(1); if (++delay > 10) return delay; } while(0) static long i8042_panic_blink(int state) { long delay = 0; char led; led = (state) ? 0x01 | 0x04 : 0; while (i8042_read_status() & I8042_STR_IBF) DELAY; dbg("%02x -> i8042 (panic blink)\n", 0xed); i8042_suppress_kbd_ack = 2; i8042_write_data(0xed); /* set leds */ DELAY; while (i8042_read_status() & I8042_STR_IBF) DELAY; DELAY; dbg("%02x -> i8042 (panic blink)\n", led); i8042_write_data(led); DELAY; return delay; } #undef DELAY #ifdef CONFIG_X86 static void i8042_dritek_enable(void) { unsigned char param = 0x90; int error; error = i8042_command(¶m, 0x1059); if (error) pr_warn("Failed to enable DRITEK extension: %d\n", error); } #endif #ifdef CONFIG_PM /* * Here we try to reset everything back to a state we had * before suspending. */ static int i8042_controller_resume(bool s2r_wants_reset) { int error; error = i8042_controller_check(); if (error) return error; if (i8042_reset == I8042_RESET_ALWAYS || (i8042_reset == I8042_RESET_ON_S2RAM && s2r_wants_reset)) { error = i8042_controller_selftest(); if (error) return error; } /* * Restore original CTR value and disable all ports */ i8042_ctr = i8042_initial_ctr; if (i8042_direct) i8042_ctr &= ~I8042_CTR_XLATE; i8042_ctr |= I8042_CTR_AUXDIS | I8042_CTR_KBDDIS; i8042_ctr &= ~(I8042_CTR_AUXINT | I8042_CTR_KBDINT); if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { pr_warn("Can't write CTR to resume, retrying...\n"); msleep(50); if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { pr_err("CTR write retry failed\n"); return -EIO; } } #ifdef CONFIG_X86 if (i8042_dritek) i8042_dritek_enable(); #endif if (i8042_mux_present) { if (i8042_set_mux_mode(true, NULL) || i8042_enable_mux_ports()) pr_warn("failed to resume active multiplexor, mouse won't work\n"); } else if (i8042_ports[I8042_AUX_PORT_NO].serio) { i8042_enable_aux_port(); } if (i8042_ports[I8042_KBD_PORT_NO].serio) i8042_enable_kbd_port(); i8042_handle_data(0); return 0; } /* * Here we try to restore the original BIOS settings to avoid * upsetting it. */ static int i8042_pm_suspend(struct device *dev) { int i; if (!i8042_forcenorestore && pm_suspend_via_firmware()) i8042_controller_reset(true); /* Set up serio interrupts for system wakeup. */ for (i = 0; i < I8042_NUM_PORTS; i++) { struct serio *serio = i8042_ports[i].serio; if (serio && device_may_wakeup(&serio->dev)) enable_irq_wake(i8042_ports[i].irq); } return 0; } static int i8042_pm_resume_noirq(struct device *dev) { if (i8042_forcenorestore || !pm_resume_via_firmware()) i8042_handle_data(0); return 0; } static int i8042_pm_resume(struct device *dev) { bool want_reset; int i; for (i = 0; i < I8042_NUM_PORTS; i++) { struct serio *serio = i8042_ports[i].serio; if (serio && device_may_wakeup(&serio->dev)) disable_irq_wake(i8042_ports[i].irq); } /* * If platform firmware was not going to be involved in suspend, we did * not restore the controller state to whatever it had been at boot * time, so we do not need to do anything. */ if (i8042_forcenorestore || !pm_suspend_via_firmware()) return 0; /* * We only need to reset the controller if we are resuming after handing * off control to the platform firmware, otherwise we can simply restore * the mode. */ want_reset = pm_resume_via_firmware(); return i8042_controller_resume(want_reset); } static int i8042_pm_thaw(struct device *dev) { i8042_handle_data(0); return 0; } static int i8042_pm_reset(struct device *dev) { i8042_controller_reset(false); return 0; } static int i8042_pm_restore(struct device *dev) { return i8042_controller_resume(false); } static const struct dev_pm_ops i8042_pm_ops = { .suspend = i8042_pm_suspend, .resume_noirq = i8042_pm_resume_noirq, .resume = i8042_pm_resume, .thaw = i8042_pm_thaw, .poweroff = i8042_pm_reset, .restore = i8042_pm_restore, }; #endif /* CONFIG_PM */ /* * We need to reset the 8042 back to original mode on system shutdown, * because otherwise BIOSes will be confused. */ static void i8042_shutdown(struct platform_device *dev) { i8042_controller_reset(false); } static int i8042_create_kbd_port(void) { struct serio *serio; struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO]; serio = kzalloc(sizeof(*serio), GFP_KERNEL); if (!serio) return -ENOMEM; serio->id.type = i8042_direct ? SERIO_8042 : SERIO_8042_XL; serio->write = i8042_dumbkbd ? NULL : i8042_kbd_write; serio->start = i8042_start; serio->stop = i8042_stop; serio->close = i8042_port_close; serio->ps2_cmd_mutex = &i8042_mutex; serio->port_data = port; serio->dev.parent = &i8042_platform_device->dev; strscpy(serio->name, "i8042 KBD port", sizeof(serio->name)); strscpy(serio->phys, I8042_KBD_PHYS_DESC, sizeof(serio->phys)); strscpy(serio->firmware_id, i8042_kbd_firmware_id, sizeof(serio->firmware_id)); set_primary_fwnode(&serio->dev, i8042_kbd_fwnode); port->serio = serio; port->irq = I8042_KBD_IRQ; return 0; } static int i8042_create_aux_port(int idx) { struct serio *serio; int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx; struct i8042_port *port = &i8042_ports[port_no]; serio = kzalloc(sizeof(*serio), GFP_KERNEL); if (!serio) return -ENOMEM; serio->id.type = SERIO_8042; serio->write = i8042_aux_write; serio->start = i8042_start; serio->stop = i8042_stop; serio->ps2_cmd_mutex = &i8042_mutex; serio->port_data = port; serio->dev.parent = &i8042_platform_device->dev; if (idx < 0) { strscpy(serio->name, "i8042 AUX port", sizeof(serio->name)); strscpy(serio->phys, I8042_AUX_PHYS_DESC, sizeof(serio->phys)); strscpy(serio->firmware_id, i8042_aux_firmware_id, sizeof(serio->firmware_id)); serio->close = i8042_port_close; } else { snprintf(serio->name, sizeof(serio->name), "i8042 AUX%d port", idx); snprintf(serio->phys, sizeof(serio->phys), I8042_MUX_PHYS_DESC, idx + 1); strscpy(serio->firmware_id, i8042_aux_firmware_id, sizeof(serio->firmware_id)); } port->serio = serio; port->mux = idx; port->irq = I8042_AUX_IRQ; return 0; } static void i8042_free_kbd_port(void) { kfree(i8042_ports[I8042_KBD_PORT_NO].serio); i8042_ports[I8042_KBD_PORT_NO].serio = NULL; } static void i8042_free_aux_ports(void) { int i; for (i = I8042_AUX_PORT_NO; i < I8042_NUM_PORTS; i++) { kfree(i8042_ports[i].serio); i8042_ports[i].serio = NULL; } } static void i8042_register_ports(void) { int i; for (i = 0; i < I8042_NUM_PORTS; i++) { struct serio *serio = i8042_ports[i].serio; if (!serio) continue; printk(KERN_INFO "serio: %s at %#lx,%#lx irq %d\n", serio->name, (unsigned long) I8042_DATA_REG, (unsigned long) I8042_COMMAND_REG, i8042_ports[i].irq); serio_register_port(serio); } } static void i8042_unregister_ports(void) { int i; for (i = 0; i < I8042_NUM_PORTS; i++) { if (i8042_ports[i].serio) { serio_unregister_port(i8042_ports[i].serio); i8042_ports[i].serio = NULL; } } } static void i8042_free_irqs(void) { if (i8042_aux_irq_registered) free_irq(I8042_AUX_IRQ, i8042_platform_device); if (i8042_kbd_irq_registered) free_irq(I8042_KBD_IRQ, i8042_platform_device); i8042_aux_irq_registered = i8042_kbd_irq_registered = false; } static int i8042_setup_aux(void) { int (*aux_enable)(void); int error; int i; if (i8042_check_aux()) return -ENODEV; if (i8042_nomux || i8042_check_mux()) { error = i8042_create_aux_port(-1); if (error) goto err_free_ports; aux_enable = i8042_enable_aux_port; } else { for (i = 0; i < I8042_NUM_MUX_PORTS; i++) { error = i8042_create_aux_port(i); if (error) goto err_free_ports; } aux_enable = i8042_enable_mux_ports; } error = request_irq(I8042_AUX_IRQ, i8042_interrupt, IRQF_SHARED, "i8042", i8042_platform_device); if (error) goto err_free_ports; error = aux_enable(); if (error) goto err_free_irq; i8042_aux_irq_registered = true; return 0; err_free_irq: free_irq(I8042_AUX_IRQ, i8042_platform_device); err_free_ports: i8042_free_aux_ports(); return error; } static int i8042_setup_kbd(void) { int error; error = i8042_create_kbd_port(); if (error) return error; error = request_irq(I8042_KBD_IRQ, i8042_interrupt, IRQF_SHARED, "i8042", i8042_platform_device); if (error) goto err_free_port; error = i8042_enable_kbd_port(); if (error) goto err_free_irq; i8042_kbd_irq_registered = true; return 0; err_free_irq: free_irq(I8042_KBD_IRQ, i8042_platform_device); err_free_port: i8042_free_kbd_port(); return error; } static int i8042_kbd_bind_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; struct serio *serio = to_serio_port(dev); struct i8042_port *port = serio->port_data; if (serio != i8042_ports[I8042_KBD_PORT_NO].serio) return 0; switch (action) { case BUS_NOTIFY_BOUND_DRIVER: port->driver_bound = true; break; case BUS_NOTIFY_UNBIND_DRIVER: port->driver_bound = false; break; } return 0; } static int i8042_probe(struct platform_device *dev) { int error; if (i8042_reset == I8042_RESET_ALWAYS) { error = i8042_controller_selftest(); if (error) return error; } error = i8042_controller_init(); if (error) return error; #ifdef CONFIG_X86 if (i8042_dritek) i8042_dritek_enable(); #endif if (!i8042_noaux) { error = i8042_setup_aux(); if (error && error != -ENODEV && error != -EBUSY) goto out_fail; } if (!i8042_nokbd) { error = i8042_setup_kbd(); if (error) goto out_fail; } /* * Ok, everything is ready, let's register all serio ports */ i8042_register_ports(); return 0; out_fail: i8042_free_aux_ports(); /* in case KBD failed but AUX not */ i8042_free_irqs(); i8042_controller_reset(false); return error; } static void i8042_remove(struct platform_device *dev) { i8042_unregister_ports(); i8042_free_irqs(); i8042_controller_reset(false); } static struct platform_driver i8042_driver = { .driver = { .name = "i8042", #ifdef CONFIG_PM .pm = &i8042_pm_ops, #endif }, .probe = i8042_probe, .remove = i8042_remove, .shutdown = i8042_shutdown, }; static struct notifier_block i8042_kbd_bind_notifier_block = { .notifier_call = i8042_kbd_bind_notifier, }; static int __init i8042_init(void) { int err; dbg_init(); err = i8042_platform_init(); if (err) return (err == -ENODEV) ? 0 : err; err = i8042_controller_check(); if (err) goto err_platform_exit; /* Set this before creating the dev to allow i8042_command to work right away */ i8042_present = true; err = platform_driver_register(&i8042_driver); if (err) goto err_platform_exit; i8042_platform_device = platform_device_alloc("i8042", -1); if (!i8042_platform_device) { err = -ENOMEM; goto err_unregister_driver; } err = platform_device_add(i8042_platform_device); if (err) goto err_free_device; bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = i8042_panic_blink; return 0; err_free_device: platform_device_put(i8042_platform_device); err_unregister_driver: platform_driver_unregister(&i8042_driver); err_platform_exit: i8042_platform_exit(); return err; } static void __exit i8042_exit(void) { if (!i8042_present) return; platform_device_unregister(i8042_platform_device); platform_driver_unregister(&i8042_driver); i8042_platform_exit(); bus_unregister_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = NULL; } module_init(i8042_init); module_exit(i8042_exit); |
| 1731 7935 2694 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM maple_tree #if !defined(_TRACE_MM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MM_H #include <linux/tracepoint.h> struct ma_state; TRACE_EVENT(ma_op, TP_PROTO(const char *fn, struct ma_state *mas), TP_ARGS(fn, mas), TP_STRUCT__entry( __field(const char *, fn) __field(unsigned long, min) __field(unsigned long, max) __field(unsigned long, index) __field(unsigned long, last) __field(void *, node) ), TP_fast_assign( __entry->fn = fn; __entry->min = mas->min; __entry->max = mas->max; __entry->index = mas->index; __entry->last = mas->last; __entry->node = mas->node; ), TP_printk("%s\tNode: %p (%lu %lu) range: %lu-%lu", __entry->fn, (void *) __entry->node, (unsigned long) __entry->min, (unsigned long) __entry->max, (unsigned long) __entry->index, (unsigned long) __entry->last ) ) TRACE_EVENT(ma_read, TP_PROTO(const char *fn, struct ma_state *mas), TP_ARGS(fn, mas), TP_STRUCT__entry( __field(const char *, fn) __field(unsigned long, min) __field(unsigned long, max) __field(unsigned long, index) __field(unsigned long, last) __field(void *, node) ), TP_fast_assign( __entry->fn = fn; __entry->min = mas->min; __entry->max = mas->max; __entry->index = mas->index; __entry->last = mas->last; __entry->node = mas->node; ), TP_printk("%s\tNode: %p (%lu %lu) range: %lu-%lu", __entry->fn, (void *) __entry->node, (unsigned long) __entry->min, (unsigned long) __entry->max, (unsigned long) __entry->index, (unsigned long) __entry->last ) ) TRACE_EVENT(ma_write, TP_PROTO(const char *fn, struct ma_state *mas, unsigned long piv, void *val), TP_ARGS(fn, mas, piv, val), TP_STRUCT__entry( __field(const char *, fn) __field(unsigned long, min) __field(unsigned long, max) __field(unsigned long, index) __field(unsigned long, last) __field(unsigned long, piv) __field(void *, val) __field(void *, node) ), TP_fast_assign( __entry->fn = fn; __entry->min = mas->min; __entry->max = mas->max; __entry->index = mas->index; __entry->last = mas->last; __entry->piv = piv; __entry->val = val; __entry->node = mas->node; ), TP_printk("%s\tNode %p (%lu %lu) range:%lu-%lu piv (%lu) val %p", __entry->fn, (void *) __entry->node, (unsigned long) __entry->min, (unsigned long) __entry->max, (unsigned long) __entry->index, (unsigned long) __entry->last, (unsigned long) __entry->piv, (void *) __entry->val ) ) #endif /* _TRACE_MM_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 5 10084 3839 10010 5119 5117 159 5119 4392 3330 3329 3329 3330 4536 4552 206 10 4113 4137 4114 6706 6710 161 19 6712 5929 650 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Percpu refcounts: * (C) 2012 Google, Inc. * Author: Kent Overstreet <koverstreet@google.com> * * This implements a refcount with similar semantics to atomic_t - atomic_inc(), * atomic_dec_and_test() - but percpu. * * There's one important difference between percpu refs and normal atomic_t * refcounts; you have to keep track of your initial refcount, and then when you * start shutting down you call percpu_ref_kill() _before_ dropping the initial * refcount. * * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less * than an atomic_t - this is because of the way shutdown works, see * percpu_ref_kill()/PERCPU_COUNT_BIAS. * * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill() * puts the ref back in single atomic_t mode, collecting the per cpu refs and * issuing the appropriate barriers, and then marks the ref as shutting down so * that percpu_ref_put() will check for the ref hitting 0. After it returns, * it's safe to drop the initial ref. * * USAGE: * * See fs/aio.c for some example usage; it's used there for struct kioctx, which * is created when userspaces calls io_setup(), and destroyed when userspace * calls io_destroy() or the process exits. * * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref. * After that, there can't be any new users of the kioctx (from lookup_ioctx()) * and it's then safe to drop the initial ref with percpu_ref_put(). * * Note that the free path, free_ioctx(), needs to go through explicit call_rcu() * to synchronize with RCU protected lookup_ioctx(). percpu_ref operations don't * imply RCU grace periods of any kind and if a user wants to combine percpu_ref * with RCU protection, it must be done explicitly. * * Code that does a two stage shutdown like this often needs some kind of * explicit synchronization to ensure the initial refcount can only be dropped * once - percpu_ref_kill() does this for you, it returns true once and false if * someone else already called it. The aio code uses it this way, but it's not * necessary if the code has some other mechanism to synchronize teardown. * around. */ #ifndef _LINUX_PERCPU_REFCOUNT_H #define _LINUX_PERCPU_REFCOUNT_H #include <linux/atomic.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/types.h> #include <linux/gfp.h> struct percpu_ref; typedef void (percpu_ref_func_t)(struct percpu_ref *); /* flags set in the lower bits of percpu_ref->percpu_count_ptr */ enum { __PERCPU_REF_ATOMIC = 1LU << 0, /* operating in atomic mode */ __PERCPU_REF_DEAD = 1LU << 1, /* (being) killed */ __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD, __PERCPU_REF_FLAG_BITS = 2, }; /* @flags for percpu_ref_init() */ enum { /* * Start w/ ref == 1 in atomic mode. Can be switched to percpu * operation using percpu_ref_switch_to_percpu(). If initialized * with this flag, the ref will stay in atomic mode until * percpu_ref_switch_to_percpu() is invoked on it. * Implies ALLOW_REINIT. */ PERCPU_REF_INIT_ATOMIC = 1 << 0, /* * Start dead w/ ref == 0 in atomic mode. Must be revived with * percpu_ref_reinit() before used. Implies INIT_ATOMIC and * ALLOW_REINIT. */ PERCPU_REF_INIT_DEAD = 1 << 1, /* * Allow switching from atomic mode to percpu mode. */ PERCPU_REF_ALLOW_REINIT = 1 << 2, }; struct percpu_ref_data { atomic_long_t count; percpu_ref_func_t *release; percpu_ref_func_t *confirm_switch; bool force_atomic:1; bool allow_reinit:1; struct rcu_head rcu; struct percpu_ref *ref; }; struct percpu_ref { /* * The low bit of the pointer indicates whether the ref is in percpu * mode; if set, then get/put will manipulate the atomic_t. */ unsigned long percpu_count_ptr; /* * 'percpu_ref' is often embedded into user structure, and only * 'percpu_count_ptr' is required in fast path, move other fields * into 'percpu_ref_data', so we can reduce memory footprint in * fast path. */ struct percpu_ref_data *data; }; int __must_check percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, unsigned int flags, gfp_t gfp); void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch); void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); void percpu_ref_switch_to_percpu(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); void percpu_ref_resurrect(struct percpu_ref *ref); void percpu_ref_reinit(struct percpu_ref *ref); bool percpu_ref_is_zero(struct percpu_ref *ref); /** * percpu_ref_kill - drop the initial ref * @ref: percpu_ref to kill * * Must be used to drop the initial ref on a percpu refcount; must be called * precisely once before shutdown. * * Switches @ref into atomic mode before gathering up the percpu counters * and dropping the initial ref. * * There are no implied RCU grace periods between kill and release. */ static inline void percpu_ref_kill(struct percpu_ref *ref) { percpu_ref_kill_and_confirm(ref, NULL); } /* * Internal helper. Don't use outside percpu-refcount proper. The * function doesn't return the pointer and let the caller test it for NULL * because doing so forces the compiler to generate two conditional * branches as it can't assume that @ref->percpu_count is not NULL. */ static inline bool __ref_is_percpu(struct percpu_ref *ref, unsigned long __percpu **percpu_countp) { unsigned long percpu_ptr; /* * The value of @ref->percpu_count_ptr is tested for * !__PERCPU_REF_ATOMIC, which may be set asynchronously, and then * used as a pointer. If the compiler generates a separate fetch * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in * between contaminating the pointer value, meaning that * READ_ONCE() is required when fetching it. * * The dependency ordering from the READ_ONCE() pairs * with smp_store_release() in __percpu_ref_switch_to_percpu(). */ percpu_ptr = READ_ONCE(ref->percpu_count_ptr); /* * Theoretically, the following could test just ATOMIC; however, * then we'd have to mask off DEAD separately as DEAD may be * visible without ATOMIC if we race with percpu_ref_kill(). DEAD * implies ATOMIC anyway. Test them together. */ if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD)) return false; *percpu_countp = (unsigned long __percpu *)percpu_ptr; return true; } /** * percpu_ref_get_many - increment a percpu refcount * @ref: percpu_ref to get * @nr: number of references to get * * Analogous to atomic_long_add(). * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) this_cpu_add(*percpu_count, nr); else atomic_long_add(nr, &ref->data->count); rcu_read_unlock(); } /** * percpu_ref_get - increment a percpu refcount * @ref: percpu_ref to get * * Analogous to atomic_long_inc(). * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_get(struct percpu_ref *ref) { percpu_ref_get_many(ref, 1); } /** * percpu_ref_tryget_many - try to increment a percpu refcount * @ref: percpu_ref to try-get * @nr: number of references to get * * Increment a percpu refcount by @nr unless its count already reached zero. * Returns %true on success; %false on failure. * * This function is safe to call as long as @ref is between init and exit. */ static inline bool percpu_ref_tryget_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; bool ret; rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) { this_cpu_add(*percpu_count, nr); ret = true; } else { ret = atomic_long_add_unless(&ref->data->count, nr, 0); } rcu_read_unlock(); return ret; } /** * percpu_ref_tryget - try to increment a percpu refcount * @ref: percpu_ref to try-get * * Increment a percpu refcount unless its count already reached zero. * Returns %true on success; %false on failure. * * This function is safe to call as long as @ref is between init and exit. */ static inline bool percpu_ref_tryget(struct percpu_ref *ref) { return percpu_ref_tryget_many(ref, 1); } /** * percpu_ref_tryget_live_rcu - same as percpu_ref_tryget_live() but the * caller is responsible for taking RCU. * * This function is safe to call as long as @ref is between init and exit. */ static inline bool percpu_ref_tryget_live_rcu(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; bool ret = false; WARN_ON_ONCE(!rcu_read_lock_held()); if (likely(__ref_is_percpu(ref, &percpu_count))) { this_cpu_inc(*percpu_count); ret = true; } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { ret = atomic_long_inc_not_zero(&ref->data->count); } return ret; } /** * percpu_ref_tryget_live - try to increment a live percpu refcount * @ref: percpu_ref to try-get * * Increment a percpu refcount unless it has already been killed. Returns * %true on success; %false on failure. * * Completion of percpu_ref_kill() in itself doesn't guarantee that this * function will fail. For such guarantee, percpu_ref_kill_and_confirm() * should be used. After the confirm_kill callback is invoked, it's * guaranteed that no new reference will be given out by * percpu_ref_tryget_live(). * * This function is safe to call as long as @ref is between init and exit. */ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { bool ret = false; rcu_read_lock(); ret = percpu_ref_tryget_live_rcu(ref); rcu_read_unlock(); return ret; } /** * percpu_ref_put_many - decrement a percpu refcount * @ref: percpu_ref to put * @nr: number of references to put * * Decrement the refcount, and if 0, call the release function (which was passed * to percpu_ref_init()) * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) this_cpu_sub(*percpu_count, nr); else if (unlikely(atomic_long_sub_and_test(nr, &ref->data->count))) ref->data->release(ref); rcu_read_unlock(); } /** * percpu_ref_put - decrement a percpu refcount * @ref: percpu_ref to put * * Decrement the refcount, and if 0, call the release function (which was passed * to percpu_ref_init()) * * This function is safe to call as long as @ref is between init and exit. */ static inline void percpu_ref_put(struct percpu_ref *ref) { percpu_ref_put_many(ref, 1); } /** * percpu_ref_is_dying - test whether a percpu refcount is dying or dead * @ref: percpu_ref to test * * Returns %true if @ref is dying or dead. * * This function is safe to call as long as @ref is between init and exit * and the caller is responsible for synchronizing against state changes. */ static inline bool percpu_ref_is_dying(struct percpu_ref *ref) { return ref->percpu_count_ptr & __PERCPU_REF_DEAD; } #endif |
| 676 676 677 961 677 677 962 374 376 865 24 1682 1682 1681 962 375 865 1682 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Intel Corporation. */ /* A common module to handle registrations and notifications for paravirtual * drivers to enable accelerated datapath and support VF live migration. * * The notifier and event handling code is based on netvsc driver. */ #include <linux/module.h> #include <linux/etherdevice.h> #include <uapi/linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/if_vlan.h> #include <net/failover.h> static LIST_HEAD(failover_list); static DEFINE_SPINLOCK(failover_lock); static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops) { struct net_device *failover_dev; struct failover *failover; spin_lock(&failover_lock); list_for_each_entry(failover, &failover_list, list) { failover_dev = rtnl_dereference(failover->failover_dev); if (ether_addr_equal(failover_dev->perm_addr, mac)) { *ops = rtnl_dereference(failover->ops); spin_unlock(&failover_lock); return failover_dev; } } spin_unlock(&failover_lock); return NULL; } /** * failover_slave_register - Register a slave netdev * * @slave_dev: slave netdev that is being registered * * Registers a slave device to a failover instance. Only ethernet devices * are supported. */ static int failover_slave_register(struct net_device *slave_dev) { struct netdev_lag_upper_info lag_upper_info; struct net_device *failover_dev; struct failover_ops *fops; int err; if (slave_dev->type != ARPHRD_ETHER) goto done; ASSERT_RTNL(); failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); if (!failover_dev) goto done; if (fops && fops->slave_pre_register && fops->slave_pre_register(slave_dev, failover_dev)) goto done; err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame, failover_dev); if (err) { netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n", err); goto done; } lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL, &lag_upper_info, NULL); if (err) { netdev_err(slave_dev, "can not set failover device %s (err = %d)\n", failover_dev->name, err); goto err_upper_link; } slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF); if (fops && fops->slave_register && !fops->slave_register(slave_dev, failover_dev)) return NOTIFY_OK; netdev_upper_dev_unlink(slave_dev, failover_dev); slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF); err_upper_link: netdev_rx_handler_unregister(slave_dev); done: return NOTIFY_DONE; } /** * failover_slave_unregister - Unregister a slave netdev * * @slave_dev: slave netdev that is being unregistered * * Unregisters a slave device from a failover instance. */ int failover_slave_unregister(struct net_device *slave_dev) { struct net_device *failover_dev; struct failover_ops *fops; if (!netif_is_failover_slave(slave_dev)) goto done; ASSERT_RTNL(); failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); if (!failover_dev) goto done; if (fops && fops->slave_pre_unregister && fops->slave_pre_unregister(slave_dev, failover_dev)) goto done; netdev_rx_handler_unregister(slave_dev); netdev_upper_dev_unlink(slave_dev, failover_dev); slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF); if (fops && fops->slave_unregister && !fops->slave_unregister(slave_dev, failover_dev)) return NOTIFY_OK; done: return NOTIFY_DONE; } EXPORT_SYMBOL_GPL(failover_slave_unregister); static int failover_slave_link_change(struct net_device *slave_dev) { struct net_device *failover_dev; struct failover_ops *fops; if (!netif_is_failover_slave(slave_dev)) goto done; ASSERT_RTNL(); failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); if (!failover_dev) goto done; if (!netif_running(failover_dev)) goto done; if (fops && fops->slave_link_change && !fops->slave_link_change(slave_dev, failover_dev)) return NOTIFY_OK; done: return NOTIFY_DONE; } static int failover_slave_name_change(struct net_device *slave_dev) { struct net_device *failover_dev; struct failover_ops *fops; if (!netif_is_failover_slave(slave_dev)) goto done; ASSERT_RTNL(); failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); if (!failover_dev) goto done; if (!netif_running(failover_dev)) goto done; if (fops && fops->slave_name_change && !fops->slave_name_change(slave_dev, failover_dev)) return NOTIFY_OK; done: return NOTIFY_DONE; } static int failover_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); /* Skip parent events */ if (netif_is_failover(event_dev)) return NOTIFY_DONE; switch (event) { case NETDEV_REGISTER: return failover_slave_register(event_dev); case NETDEV_UNREGISTER: return failover_slave_unregister(event_dev); case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: return failover_slave_link_change(event_dev); case NETDEV_CHANGENAME: return failover_slave_name_change(event_dev); default: return NOTIFY_DONE; } } static struct notifier_block failover_notifier = { .notifier_call = failover_event, }; static void failover_existing_slave_register(struct net_device *failover_dev) { struct net *net = dev_net(failover_dev); struct net_device *dev; rtnl_lock(); for_each_netdev(net, dev) { if (netif_is_failover(dev)) continue; if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) failover_slave_register(dev); } rtnl_unlock(); } /** * failover_register - Register a failover instance * * @dev: failover netdev * @ops: failover ops * * Allocate and register a failover instance for a failover netdev. ops * provides handlers for slave device register/unregister/link change/ * name change events. * * Return: pointer to failover instance */ struct failover *failover_register(struct net_device *dev, struct failover_ops *ops) { struct failover *failover; if (dev->type != ARPHRD_ETHER) return ERR_PTR(-EINVAL); failover = kzalloc(sizeof(*failover), GFP_KERNEL); if (!failover) return ERR_PTR(-ENOMEM); rcu_assign_pointer(failover->ops, ops); netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL); dev->priv_flags |= IFF_FAILOVER; rcu_assign_pointer(failover->failover_dev, dev); spin_lock(&failover_lock); list_add_tail(&failover->list, &failover_list); spin_unlock(&failover_lock); netdev_info(dev, "failover master:%s registered\n", dev->name); failover_existing_slave_register(dev); return failover; } EXPORT_SYMBOL_GPL(failover_register); /** * failover_unregister - Unregister a failover instance * * @failover: pointer to failover instance * * Unregisters and frees a failover instance. */ void failover_unregister(struct failover *failover) { struct net_device *failover_dev; failover_dev = rcu_dereference(failover->failover_dev); netdev_info(failover_dev, "failover master:%s unregistered\n", failover_dev->name); failover_dev->priv_flags &= ~IFF_FAILOVER; netdev_put(failover_dev, &failover->dev_tracker); spin_lock(&failover_lock); list_del(&failover->list); spin_unlock(&failover_lock); kfree(failover); } EXPORT_SYMBOL_GPL(failover_unregister); static __init int failover_init(void) { register_netdevice_notifier(&failover_notifier); return 0; } module_init(failover_init); static __exit void failover_exit(void) { unregister_netdevice_notifier(&failover_notifier); } module_exit(failover_exit); MODULE_DESCRIPTION("Generic failover infrastructure/interface"); MODULE_LICENSE("GPL v2"); |
| 179 293 291 573 134 125 573 134 532 87 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | #ifndef IOU_ALLOC_CACHE_H #define IOU_ALLOC_CACHE_H #include <linux/io_uring_types.h> /* * Don't allow the cache to grow beyond this size. */ #define IO_ALLOC_CACHE_MAX 128 void io_alloc_cache_free(struct io_alloc_cache *cache, void (*free)(const void *)); bool io_alloc_cache_init(struct io_alloc_cache *cache, unsigned max_nr, unsigned int size, unsigned int init_bytes); void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp); static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, void *entry) { if (cache->nr_cached < cache->max_cached) { if (!kasan_mempool_poison_object(entry)) return false; cache->entries[cache->nr_cached++] = entry; return true; } return false; } static inline void *io_alloc_cache_get(struct io_alloc_cache *cache) { if (cache->nr_cached) { void *entry = cache->entries[--cache->nr_cached]; /* * If KASAN is enabled, always clear the initial bytes that * must be zeroed post alloc, in case any of them overlap * with KASAN storage. */ #if defined(CONFIG_KASAN) kasan_mempool_unpoison_object(entry, cache->elem_size); if (cache->init_clear) memset(entry, 0, cache->init_clear); #endif return entry; } return NULL; } static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp) { void *obj; obj = io_alloc_cache_get(cache); if (obj) return obj; return io_cache_alloc_new(cache, gfp); } static inline void io_cache_free(struct io_alloc_cache *cache, void *obj) { if (!io_alloc_cache_put(cache, obj)) kfree(obj); } #endif |
| 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/fs/befs/endian.h * * Copyright (C) 2001 Will Dyson <will_dyson@pobox.com> * * Partially based on similar funtions in the sysv driver. */ #ifndef LINUX_BEFS_ENDIAN #define LINUX_BEFS_ENDIAN #include <asm/byteorder.h> static inline u64 fs64_to_cpu(const struct super_block *sb, fs64 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) return le64_to_cpu((__force __le64)n); else return be64_to_cpu((__force __be64)n); } static inline fs64 cpu_to_fs64(const struct super_block *sb, u64 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) return (__force fs64)cpu_to_le64(n); else return (__force fs64)cpu_to_be64(n); } static inline u32 fs32_to_cpu(const struct super_block *sb, fs32 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) return le32_to_cpu((__force __le32)n); else return be32_to_cpu((__force __be32)n); } static inline fs32 cpu_to_fs32(const struct super_block *sb, u32 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) return (__force fs32)cpu_to_le32(n); else return (__force fs32)cpu_to_be32(n); } static inline u16 fs16_to_cpu(const struct super_block *sb, fs16 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) return le16_to_cpu((__force __le16)n); else return be16_to_cpu((__force __be16)n); } static inline fs16 cpu_to_fs16(const struct super_block *sb, u16 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) return (__force fs16)cpu_to_le16(n); else return (__force fs16)cpu_to_be16(n); } /* Composite types below here */ static inline befs_block_run fsrun_to_cpu(const struct super_block *sb, befs_disk_block_run n) { befs_block_run run; if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) { run.allocation_group = le32_to_cpu((__force __le32)n.allocation_group); run.start = le16_to_cpu((__force __le16)n.start); run.len = le16_to_cpu((__force __le16)n.len); } else { run.allocation_group = be32_to_cpu((__force __be32)n.allocation_group); run.start = be16_to_cpu((__force __be16)n.start); run.len = be16_to_cpu((__force __be16)n.len); } return run; } static inline befs_disk_block_run cpu_to_fsrun(const struct super_block *sb, befs_block_run n) { befs_disk_block_run run; if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) { run.allocation_group = cpu_to_le32(n.allocation_group); run.start = cpu_to_le16(n.start); run.len = cpu_to_le16(n.len); } else { run.allocation_group = cpu_to_be32(n.allocation_group); run.start = cpu_to_be16(n.start); run.len = cpu_to_be16(n.len); } return run; } static inline befs_data_stream fsds_to_cpu(const struct super_block *sb, const befs_disk_data_stream *n) { befs_data_stream data; int i; for (i = 0; i < BEFS_NUM_DIRECT_BLOCKS; ++i) data.direct[i] = fsrun_to_cpu(sb, n->direct[i]); data.max_direct_range = fs64_to_cpu(sb, n->max_direct_range); data.indirect = fsrun_to_cpu(sb, n->indirect); data.max_indirect_range = fs64_to_cpu(sb, n->max_indirect_range); data.double_indirect = fsrun_to_cpu(sb, n->double_indirect); data.max_double_indirect_range = fs64_to_cpu(sb, n-> max_double_indirect_range); data.size = fs64_to_cpu(sb, n->size); return data; } #endif //LINUX_BEFS_ENDIAN |
| 617 621 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 | /* * Copyright (C) 2016 Red Hat * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Rob Clark <robdclark@gmail.com> */ #ifndef DRM_PRINT_H_ #define DRM_PRINT_H_ #include <linux/compiler.h> #include <linux/printk.h> #include <linux/device.h> #include <linux/dynamic_debug.h> #include <drm/drm.h> #include <drm/drm_device.h> struct debugfs_regset32; struct drm_device; struct seq_file; /* Do *not* use outside of drm_print.[ch]! */ extern unsigned long __drm_debug; /** * DOC: print * * A simple wrapper for dev_printk(), seq_printf(), etc. Allows same * debug code to be used for both debugfs and printk logging. * * For example:: * * void log_some_info(struct drm_printer *p) * { * drm_printf(p, "foo=%d\n", foo); * drm_printf(p, "bar=%d\n", bar); * } * * #ifdef CONFIG_DEBUG_FS * void debugfs_show(struct seq_file *f) * { * struct drm_printer p = drm_seq_file_printer(f); * log_some_info(&p); * } * #endif * * void some_other_function(...) * { * struct drm_printer p = drm_info_printer(drm->dev); * log_some_info(&p); * } */ /** * enum drm_debug_category - The DRM debug categories * * Each of the DRM debug logging macros use a specific category, and the logging * is filtered by the drm.debug module parameter. This enum specifies the values * for the interface. * * Each DRM_DEBUG_<CATEGORY> macro logs to DRM_UT_<CATEGORY> category, except * DRM_DEBUG() logs to DRM_UT_CORE. * * Enabling verbose debug messages is done through the drm.debug parameter, each * category being enabled by a bit: * * - drm.debug=0x1 will enable CORE messages * - drm.debug=0x2 will enable DRIVER messages * - drm.debug=0x3 will enable CORE and DRIVER messages * - ... * - drm.debug=0x1ff will enable all messages * * An interesting feature is that it's possible to enable verbose logging at * run-time by echoing the debug value in its sysfs node:: * * # echo 0xf > /sys/module/drm/parameters/debug * */ enum drm_debug_category { /* These names must match those in DYNAMIC_DEBUG_CLASSBITS */ /** * @DRM_UT_CORE: Used in the generic drm code: drm_ioctl.c, drm_mm.c, * drm_memory.c, ... */ DRM_UT_CORE, /** * @DRM_UT_DRIVER: Used in the vendor specific part of the driver: i915, * radeon, ... macro. */ DRM_UT_DRIVER, /** * @DRM_UT_KMS: Used in the modesetting code. */ DRM_UT_KMS, /** * @DRM_UT_PRIME: Used in the prime code. */ DRM_UT_PRIME, /** * @DRM_UT_ATOMIC: Used in the atomic code. */ DRM_UT_ATOMIC, /** * @DRM_UT_VBL: Used for verbose debug message in the vblank code. */ DRM_UT_VBL, /** * @DRM_UT_STATE: Used for verbose atomic state debugging. */ DRM_UT_STATE, /** * @DRM_UT_LEASE: Used in the lease code. */ DRM_UT_LEASE, /** * @DRM_UT_DP: Used in the DP code. */ DRM_UT_DP, /** * @DRM_UT_DRMRES: Used in the drm managed resources code. */ DRM_UT_DRMRES }; static inline bool drm_debug_enabled_raw(enum drm_debug_category category) { return unlikely(__drm_debug & BIT(category)); } #define drm_debug_enabled_instrumented(category) \ ({ \ pr_debug("todo: is this frequent enough to optimize ?\n"); \ drm_debug_enabled_raw(category); \ }) #if defined(CONFIG_DRM_USE_DYNAMIC_DEBUG) /* * the drm.debug API uses dyndbg, so each drm_*dbg macro/callsite gets * a descriptor, and only enabled callsites are reachable. They use * the private macro to avoid re-testing the enable-bit. */ #define __drm_debug_enabled(category) true #define drm_debug_enabled(category) drm_debug_enabled_instrumented(category) #else #define __drm_debug_enabled(category) drm_debug_enabled_raw(category) #define drm_debug_enabled(category) drm_debug_enabled_raw(category) #endif /** * struct drm_printer - drm output "stream" * * Do not use struct members directly. Use drm_printer_seq_file(), * drm_printer_info(), etc to initialize. And drm_printf() for output. */ struct drm_printer { /* private: */ void (*printfn)(struct drm_printer *p, struct va_format *vaf); void (*puts)(struct drm_printer *p, const char *str); void *arg; const void *origin; const char *prefix; struct { unsigned int series; unsigned int counter; } line; enum drm_debug_category category; }; void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf); void __drm_puts_coredump(struct drm_printer *p, const char *str); void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf); void __drm_puts_seq_file(struct drm_printer *p, const char *str); void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf); void __drm_printfn_dbg(struct drm_printer *p, struct va_format *vaf); void __drm_printfn_err(struct drm_printer *p, struct va_format *vaf); void __drm_printfn_line(struct drm_printer *p, struct va_format *vaf); __printf(2, 3) void drm_printf(struct drm_printer *p, const char *f, ...); void drm_puts(struct drm_printer *p, const char *str); void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset); void drm_print_bits(struct drm_printer *p, unsigned long value, const char * const bits[], unsigned int nbits); void drm_print_hex_dump(struct drm_printer *p, const char *prefix, const u8 *buf, size_t len); __printf(2, 0) /** * drm_vprintf - print to a &drm_printer stream * @p: the &drm_printer * @fmt: format string * @va: the va_list */ static inline void drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va) { struct va_format vaf = { .fmt = fmt, .va = va }; p->printfn(p, &vaf); } /** * drm_printf_indent - Print to a &drm_printer stream with indentation * @printer: DRM printer * @indent: Tab indentation level (max 5) * @fmt: Format string */ #define drm_printf_indent(printer, indent, fmt, ...) \ drm_printf((printer), "%.*s" fmt, (indent), "\t\t\t\t\tX", ##__VA_ARGS__) /** * struct drm_print_iterator - local struct used with drm_printer_coredump * @data: Pointer to the devcoredump output buffer, can be NULL if using * drm_printer_coredump to determine size of devcoredump * @start: The offset within the buffer to start writing * @remain: The number of bytes to write for this iteration */ struct drm_print_iterator { void *data; ssize_t start; ssize_t remain; /* private: */ ssize_t offset; }; /** * drm_coredump_printer - construct a &drm_printer that can output to a buffer * from the read function for devcoredump * @iter: A pointer to a struct drm_print_iterator for the read instance * * This wrapper extends drm_printf() to work with a dev_coredumpm() callback * function. The passed in drm_print_iterator struct contains the buffer * pointer, size and offset as passed in from devcoredump. * * For example:: * * void coredump_read(char *buffer, loff_t offset, size_t count, * void *data, size_t datalen) * { * struct drm_print_iterator iter; * struct drm_printer p; * * iter.data = buffer; * iter.start = offset; * iter.remain = count; * * p = drm_coredump_printer(&iter); * * drm_printf(p, "foo=%d\n", foo); * } * * void makecoredump(...) * { * ... * dev_coredumpm(dev, THIS_MODULE, data, 0, GFP_KERNEL, * coredump_read, ...) * } * * The above example has a time complexity of O(N^2), where N is the size of the * devcoredump. This is acceptable for small devcoredumps but scales poorly for * larger ones. * * Another use case for drm_coredump_printer is to capture the devcoredump into * a saved buffer before the dev_coredump() callback. This involves two passes: * one to determine the size of the devcoredump and another to print it to a * buffer. Then, in dev_coredump(), copy from the saved buffer into the * devcoredump read buffer. * * For example:: * * char *devcoredump_saved_buffer; * * ssize_t __coredump_print(char *buffer, ssize_t count, ...) * { * struct drm_print_iterator iter; * struct drm_printer p; * * iter.data = buffer; * iter.start = 0; * iter.remain = count; * * p = drm_coredump_printer(&iter); * * drm_printf(p, "foo=%d\n", foo); * ... * return count - iter.remain; * } * * void coredump_print(...) * { * ssize_t count; * * count = __coredump_print(NULL, INT_MAX, ...); * devcoredump_saved_buffer = kvmalloc(count, GFP_KERNEL); * __coredump_print(devcoredump_saved_buffer, count, ...); * } * * void coredump_read(char *buffer, loff_t offset, size_t count, * void *data, size_t datalen) * { * ... * memcpy(buffer, devcoredump_saved_buffer + offset, count); * ... * } * * The above example has a time complexity of O(N*2), where N is the size of the * devcoredump. This scales better than the previous example for larger * devcoredumps. * * RETURNS: * The &drm_printer object */ static inline struct drm_printer drm_coredump_printer(struct drm_print_iterator *iter) { struct drm_printer p = { .printfn = __drm_printfn_coredump, .puts = __drm_puts_coredump, .arg = iter, }; /* Set the internal offset of the iterator to zero */ iter->offset = 0; return p; } /** * drm_coredump_printer_is_full() - DRM coredump printer output is full * @p: DRM coredump printer * * DRM printer output is full, useful to short circuit coredump printing once * printer is full. * * RETURNS: * True if DRM coredump printer output buffer is full, False otherwise */ static inline bool drm_coredump_printer_is_full(struct drm_printer *p) { struct drm_print_iterator *iterator = p->arg; if (p->printfn != __drm_printfn_coredump) return true; return !iterator->remain; } /** * drm_seq_file_printer - construct a &drm_printer that outputs to &seq_file * @f: the &struct seq_file to output to * * RETURNS: * The &drm_printer object */ static inline struct drm_printer drm_seq_file_printer(struct seq_file *f) { struct drm_printer p = { .printfn = __drm_printfn_seq_file, .puts = __drm_puts_seq_file, .arg = f, }; return p; } /** * drm_info_printer - construct a &drm_printer that outputs to dev_printk() * @dev: the &struct device pointer * * RETURNS: * The &drm_printer object */ static inline struct drm_printer drm_info_printer(struct device *dev) { struct drm_printer p = { .printfn = __drm_printfn_info, .arg = dev, }; return p; } /** * drm_dbg_printer - construct a &drm_printer for drm device specific output * @drm: the &struct drm_device pointer, or NULL * @category: the debug category to use * @prefix: debug output prefix, or NULL for no prefix * * RETURNS: * The &drm_printer object */ static inline struct drm_printer drm_dbg_printer(struct drm_device *drm, enum drm_debug_category category, const char *prefix) { struct drm_printer p = { .printfn = __drm_printfn_dbg, .arg = drm, .origin = (const void *)_THIS_IP_, /* it's fine as we will be inlined */ .prefix = prefix, .category = category, }; return p; } /** * drm_err_printer - construct a &drm_printer that outputs to drm_err() * @drm: the &struct drm_device pointer * @prefix: debug output prefix, or NULL for no prefix * * RETURNS: * The &drm_printer object */ static inline struct drm_printer drm_err_printer(struct drm_device *drm, const char *prefix) { struct drm_printer p = { .printfn = __drm_printfn_err, .arg = drm, .prefix = prefix }; return p; } /** * drm_line_printer - construct a &drm_printer that prefixes outputs with line numbers * @p: the &struct drm_printer which actually generates the output * @prefix: optional output prefix, or NULL for no prefix * @series: optional unique series identifier, or 0 to omit identifier in the output * * This printer can be used to increase the robustness of the captured output * to make sure we didn't lost any intermediate lines of the output. Helpful * while capturing some crash data. * * Example 1:: * * void crash_dump(struct drm_device *drm) * { * static unsigned int id; * struct drm_printer p = drm_err_printer(drm, "crash"); * struct drm_printer lp = drm_line_printer(&p, "dump", ++id); * * drm_printf(&lp, "foo"); * drm_printf(&lp, "bar"); * } * * Above code will print into the dmesg something like:: * * [ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.1: foo * [ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.2: bar * * Example 2:: * * void line_dump(struct device *dev) * { * struct drm_printer p = drm_info_printer(dev); * struct drm_printer lp = drm_line_printer(&p, NULL, 0); * * drm_printf(&lp, "foo"); * drm_printf(&lp, "bar"); * } * * Above code will print:: * * [ ] 0000:00:00.0: [drm] 1: foo * [ ] 0000:00:00.0: [drm] 2: bar * * RETURNS: * The &drm_printer object */ static inline struct drm_printer drm_line_printer(struct drm_printer *p, const char *prefix, unsigned int series) { struct drm_printer lp = { .printfn = __drm_printfn_line, .arg = p, .prefix = prefix, .line = { .series = series, }, }; return lp; } /* * struct device based logging * * Prefer drm_device based logging over device or printk based logging. */ __printf(3, 4) void drm_dev_printk(const struct device *dev, const char *level, const char *format, ...); struct _ddebug; __printf(4, 5) void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev, enum drm_debug_category category, const char *format, ...); /** * DRM_DEV_ERROR() - Error output. * * NOTE: this is deprecated in favor of drm_err() or dev_err(). * * @dev: device pointer * @fmt: printf() like format string. */ #define DRM_DEV_ERROR(dev, fmt, ...) \ drm_dev_printk(dev, KERN_ERR, "*ERROR* " fmt, ##__VA_ARGS__) /** * DRM_DEV_ERROR_RATELIMITED() - Rate limited error output. * * NOTE: this is deprecated in favor of drm_err_ratelimited() or * dev_err_ratelimited(). * * @dev: device pointer * @fmt: printf() like format string. * * Like DRM_ERROR() but won't flood the log. */ #define DRM_DEV_ERROR_RATELIMITED(dev, fmt, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ \ if (__ratelimit(&_rs)) \ DRM_DEV_ERROR(dev, fmt, ##__VA_ARGS__); \ }) /* NOTE: this is deprecated in favor of drm_info() or dev_info(). */ #define DRM_DEV_INFO(dev, fmt, ...) \ drm_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_info_once() or dev_info_once(). */ #define DRM_DEV_INFO_ONCE(dev, fmt, ...) \ ({ \ static bool __print_once __read_mostly; \ if (!__print_once) { \ __print_once = true; \ DRM_DEV_INFO(dev, fmt, ##__VA_ARGS__); \ } \ }) #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG) #define drm_dev_dbg(dev, cat, fmt, ...) \ __drm_dev_dbg(NULL, dev, cat, fmt, ##__VA_ARGS__) #else #define drm_dev_dbg(dev, cat, fmt, ...) \ _dynamic_func_call_cls(cat, fmt, __drm_dev_dbg, \ dev, cat, fmt, ##__VA_ARGS__) #endif /** * DRM_DEV_DEBUG() - Debug output for generic drm code * * NOTE: this is deprecated in favor of drm_dbg_core(). * * @dev: device pointer * @fmt: printf() like format string. */ #define DRM_DEV_DEBUG(dev, fmt, ...) \ drm_dev_dbg(dev, DRM_UT_CORE, fmt, ##__VA_ARGS__) /** * DRM_DEV_DEBUG_DRIVER() - Debug output for vendor specific part of the driver * * NOTE: this is deprecated in favor of drm_dbg() or dev_dbg(). * * @dev: device pointer * @fmt: printf() like format string. */ #define DRM_DEV_DEBUG_DRIVER(dev, fmt, ...) \ drm_dev_dbg(dev, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) /** * DRM_DEV_DEBUG_KMS() - Debug output for modesetting code * * NOTE: this is deprecated in favor of drm_dbg_kms(). * * @dev: device pointer * @fmt: printf() like format string. */ #define DRM_DEV_DEBUG_KMS(dev, fmt, ...) \ drm_dev_dbg(dev, DRM_UT_KMS, fmt, ##__VA_ARGS__) /* * struct drm_device based logging * * Prefer drm_device based logging over device or prink based logging. */ /* Helper to enforce struct drm_device type */ static inline struct device *__drm_to_dev(const struct drm_device *drm) { return drm ? drm->dev : NULL; } /* Helper for struct drm_device based logging. */ #define __drm_printk(drm, level, type, fmt, ...) \ dev_##level##type(__drm_to_dev(drm), "[drm] " fmt, ##__VA_ARGS__) #define drm_info(drm, fmt, ...) \ __drm_printk((drm), info,, fmt, ##__VA_ARGS__) #define drm_notice(drm, fmt, ...) \ __drm_printk((drm), notice,, fmt, ##__VA_ARGS__) #define drm_warn(drm, fmt, ...) \ __drm_printk((drm), warn,, fmt, ##__VA_ARGS__) #define drm_err(drm, fmt, ...) \ __drm_printk((drm), err,, "*ERROR* " fmt, ##__VA_ARGS__) #define drm_info_once(drm, fmt, ...) \ __drm_printk((drm), info, _once, fmt, ##__VA_ARGS__) #define drm_notice_once(drm, fmt, ...) \ __drm_printk((drm), notice, _once, fmt, ##__VA_ARGS__) #define drm_warn_once(drm, fmt, ...) \ __drm_printk((drm), warn, _once, fmt, ##__VA_ARGS__) #define drm_err_once(drm, fmt, ...) \ __drm_printk((drm), err, _once, "*ERROR* " fmt, ##__VA_ARGS__) #define drm_err_ratelimited(drm, fmt, ...) \ __drm_printk((drm), err, _ratelimited, "*ERROR* " fmt, ##__VA_ARGS__) #define drm_dbg_core(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_CORE, fmt, ##__VA_ARGS__) #define drm_dbg_driver(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_DRIVER, fmt, ##__VA_ARGS__) #define drm_dbg_kms(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_KMS, fmt, ##__VA_ARGS__) #define drm_dbg_prime(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_PRIME, fmt, ##__VA_ARGS__) #define drm_dbg_atomic(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_ATOMIC, fmt, ##__VA_ARGS__) #define drm_dbg_vbl(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_VBL, fmt, ##__VA_ARGS__) #define drm_dbg_state(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_STATE, fmt, ##__VA_ARGS__) #define drm_dbg_lease(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_LEASE, fmt, ##__VA_ARGS__) #define drm_dbg_dp(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_DP, fmt, ##__VA_ARGS__) #define drm_dbg_drmres(drm, fmt, ...) \ drm_dev_dbg(__drm_to_dev(drm), DRM_UT_DRMRES, fmt, ##__VA_ARGS__) #define drm_dbg(drm, fmt, ...) drm_dbg_driver(drm, fmt, ##__VA_ARGS__) /* * printk based logging * * Prefer drm_device based logging over device or prink based logging. */ __printf(1, 2) void __drm_err(const char *format, ...); #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG) #define __drm_dbg(cat, fmt, ...) __drm_dev_dbg(NULL, NULL, cat, fmt, ##__VA_ARGS__) #else #define __drm_dbg(cat, fmt, ...) \ _dynamic_func_call_cls(cat, fmt, __drm_dev_dbg, \ NULL, cat, fmt, ##__VA_ARGS__) #endif /* Macros to make printk easier */ #define _DRM_PRINTK(once, level, fmt, ...) \ printk##once(KERN_##level "[" DRM_NAME "] " fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_info(). */ #define DRM_INFO(fmt, ...) \ _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_notice(). */ #define DRM_NOTE(fmt, ...) \ _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_warn(). */ #define DRM_WARN(fmt, ...) \ _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_info_once(). */ #define DRM_INFO_ONCE(fmt, ...) \ _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_notice_once(). */ #define DRM_NOTE_ONCE(fmt, ...) \ _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_warn_once(). */ #define DRM_WARN_ONCE(fmt, ...) \ _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_err(). */ #define DRM_ERROR(fmt, ...) \ __drm_err(fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of pr_err_ratelimited(). */ #define DRM_ERROR_RATELIMITED(fmt, ...) \ DRM_DEV_ERROR_RATELIMITED(NULL, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg_core(NULL, ...). */ #define DRM_DEBUG(fmt, ...) \ __drm_dbg(DRM_UT_CORE, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg(NULL, ...). */ #define DRM_DEBUG_DRIVER(fmt, ...) \ __drm_dbg(DRM_UT_DRIVER, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg_kms(NULL, ...). */ #define DRM_DEBUG_KMS(fmt, ...) \ __drm_dbg(DRM_UT_KMS, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg_prime(NULL, ...). */ #define DRM_DEBUG_PRIME(fmt, ...) \ __drm_dbg(DRM_UT_PRIME, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg_atomic(NULL, ...). */ #define DRM_DEBUG_ATOMIC(fmt, ...) \ __drm_dbg(DRM_UT_ATOMIC, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg_vbl(NULL, ...). */ #define DRM_DEBUG_VBL(fmt, ...) \ __drm_dbg(DRM_UT_VBL, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg_lease(NULL, ...). */ #define DRM_DEBUG_LEASE(fmt, ...) \ __drm_dbg(DRM_UT_LEASE, fmt, ##__VA_ARGS__) /* NOTE: this is deprecated in favor of drm_dbg_dp(NULL, ...). */ #define DRM_DEBUG_DP(fmt, ...) \ __drm_dbg(DRM_UT_DP, fmt, ## __VA_ARGS__) #define __DRM_DEFINE_DBG_RATELIMITED(category, drm, fmt, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(rs_, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);\ \ if (drm_debug_enabled(DRM_UT_ ## category) && __ratelimit(&rs_)) \ drm_dev_printk(__drm_to_dev(drm), KERN_DEBUG, fmt, ## __VA_ARGS__); \ }) #define drm_dbg_ratelimited(drm, fmt, ...) \ __DRM_DEFINE_DBG_RATELIMITED(DRIVER, drm, fmt, ## __VA_ARGS__) #define drm_dbg_kms_ratelimited(drm, fmt, ...) \ __DRM_DEFINE_DBG_RATELIMITED(KMS, drm, fmt, ## __VA_ARGS__) /* * struct drm_device based WARNs * * drm_WARN*() acts like WARN*(), but with the key difference of * using device specific information so that we know from which device * warning is originating from. * * Prefer drm_device based drm_WARN* over regular WARN* */ /* Helper for struct drm_device based WARNs */ #define drm_WARN(drm, condition, format, arg...) \ WARN(condition, "%s %s: [drm] " format, \ dev_driver_string(__drm_to_dev(drm)), \ dev_name(__drm_to_dev(drm)), ## arg) #define drm_WARN_ONCE(drm, condition, format, arg...) \ WARN_ONCE(condition, "%s %s: [drm] " format, \ dev_driver_string(__drm_to_dev(drm)), \ dev_name(__drm_to_dev(drm)), ## arg) #define drm_WARN_ON(drm, x) \ drm_WARN((drm), (x), "%s", \ "drm_WARN_ON(" __stringify(x) ")") #define drm_WARN_ON_ONCE(drm, x) \ drm_WARN_ONCE((drm), (x), "%s", \ "drm_WARN_ON_ONCE(" __stringify(x) ")") #endif /* DRM_PRINT_H_ */ |
| 140 3072 170 1078 1075 83 18 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H #include <linux/bug.h> #include <linux/kernel.h> #include <linux/list.h> #include <uapi/linux/random.h> struct notifier_block; void add_device_randomness(const void *buf, size_t len); void __init add_bootloader_randomness(const void *buf, size_t len); void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; void add_interrupt_randomness(int irq) __latent_entropy; void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after); static inline void add_latent_entropy(void) { #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); #else add_device_randomness(NULL, 0); #endif } #if IS_ENABLED(CONFIG_VMGENID) void add_vmfork_randomness(const void *unique_vm_id, size_t len); int register_random_vmfork_notifier(struct notifier_block *nb); int unregister_random_vmfork_notifier(struct notifier_block *nb); #else static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; } static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; } #endif void get_random_bytes(void *buf, size_t len); u8 get_random_u8(void); u16 get_random_u16(void); u32 get_random_u32(void); u64 get_random_u64(void); static inline unsigned long get_random_long(void) { #if BITS_PER_LONG == 64 return get_random_u64(); #else return get_random_u32(); #endif } u32 __get_random_u32_below(u32 ceil); /* * Returns a random integer in the interval [0, ceil), with uniform * distribution, suitable for all uses. Fastest when ceil is a constant, but * still fast for variable ceil as well. */ static inline u32 get_random_u32_below(u32 ceil) { if (!__builtin_constant_p(ceil)) return __get_random_u32_below(ceil); /* * For the fast path, below, all operations on ceil are precomputed by * the compiler, so this incurs no overhead for checking pow2, doing * divisions, or branching based on integer size. The resultant * algorithm does traditional reciprocal multiplication (typically * optimized by the compiler into shifts and adds), rejecting samples * whose lower half would indicate a range indivisible by ceil. */ BUILD_BUG_ON_MSG(!ceil, "get_random_u32_below() must take ceil > 0"); if (ceil <= 1) return 0; for (;;) { if (ceil <= 1U << 8) { u32 mult = ceil * get_random_u8(); if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil)) return mult >> 8; } else if (ceil <= 1U << 16) { u32 mult = ceil * get_random_u16(); if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil)) return mult >> 16; } else { u64 mult = (u64)ceil * get_random_u32(); if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil)) return mult >> 32; } } } /* * Returns a random integer in the interval (floor, U32_MAX], with uniform * distribution, suitable for all uses. Fastest when floor is a constant, but * still fast for variable floor as well. */ static inline u32 get_random_u32_above(u32 floor) { BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && floor == U32_MAX, "get_random_u32_above() must take floor < U32_MAX"); return floor + 1 + get_random_u32_below(U32_MAX - floor); } /* * Returns a random integer in the interval [floor, ceil], with uniform * distribution, suitable for all uses. Fastest when floor and ceil are * constant, but still fast for variable floor and ceil as well. */ static inline u32 get_random_u32_inclusive(u32 floor, u32 ceil) { BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && __builtin_constant_p(ceil) && (floor > ceil || ceil - floor == U32_MAX), "get_random_u32_inclusive() must take floor <= ceil"); return floor + get_random_u32_below(ceil - floor + 1); } void __init random_init_early(const char *command_line); void __init random_init(void); bool rng_is_initialized(void); int wait_for_random_bytes(void); int execute_with_initialized_rng(struct notifier_block *nb); /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); return ret; } #ifdef CONFIG_SMP int random_prepare_cpu(unsigned int cpu); int random_online_cpu(unsigned int cpu); #endif #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; #endif #endif /* _LINUX_RANDOM_H */ |
| 168 168 168 168 168 167 147 147 147 147 147 147 106 106 6 12 25 14 7 14 24 12 82 77 4 94 93 94 4 94 94 94 68 94 94 94 94 82 82 82 82 5 2 82 94 78 94 68 94 94 4 94 5 17 21 21 17 17 17 17 17 17 17 17 17 147 9 9 21 21 9 21 6 4 9 37 6 3 37 1 167 168 166 174 175 175 37 168 168 167 168 147 147 147 147 147 106 106 106 29 9 24 94 21 96 94 94 168 168 199 184 106 208 174 174 175 40 175 175 175 184 185 135 24 111 135 135 1 2 1 47 175 10 4 175 4 175 148 106 44 148 106 82 82 106 66 66 175 174 174 5 174 175 174 174 20 32 30 3 174 175 174 175 175 175 43 175 32 148 31 175 41 41 39 40 34 176 177 176 176 35 34 176 33 175 176 174 13 12 15 13 12 12 2 10 4 13 13 203 12 10 15 12 12 3 22 1 6 5 15 184 205 180 196 49 49 14 6 3 203 203 204 203 199 164 198 24 150 1 178 33 82 191 160 117 42 117 117 116 115 209 209 209 208 187 187 186 185 184 184 51 51 51 51 51 6 49 49 3 4 50 49 49 49 49 49 49 49 49 210 209 203 205 204 184 51 162 154 202 201 186 40 205 209 210 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 | // SPDX-License-Identifier: GPL-2.0 /* * mm/mremap.c * * (C) Copyright 1996 Linus Torvalds * * Address space accounting code <alan@lxorguk.ukuu.org.uk> * (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/hugetlb.h> #include <linux/shm.h> #include <linux/ksm.h> #include <linux/mman.h> #include <linux/swap.h> #include <linux/capability.h> #include <linux/fs.h> #include <linux/leafops.h> #include <linux/highmem.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/mmu_notifier.h> #include <linux/uaccess.h> #include <linux/userfaultfd_k.h> #include <linux/mempolicy.h> #include <linux/pgalloc.h> #include <asm/cacheflush.h> #include <asm/tlb.h> #include "internal.h" /* Classify the kind of remap operation being performed. */ enum mremap_type { MREMAP_INVALID, /* Initial state. */ MREMAP_NO_RESIZE, /* old_len == new_len, if not moved, do nothing. */ MREMAP_SHRINK, /* old_len > new_len. */ MREMAP_EXPAND, /* old_len < new_len. */ }; /* * Describes a VMA mremap() operation and is threaded throughout it. * * Any of the fields may be mutated by the operation, however these values will * always accurately reflect the remap (for instance, we may adjust lengths and * delta to account for hugetlb alignment). */ struct vma_remap_struct { /* User-provided state. */ unsigned long addr; /* User-specified address from which we remap. */ unsigned long old_len; /* Length of range being remapped. */ unsigned long new_len; /* Desired new length of mapping. */ const unsigned long flags; /* user-specified MREMAP_* flags. */ unsigned long new_addr; /* Optionally, desired new address. */ /* uffd state. */ struct vm_userfaultfd_ctx *uf; struct list_head *uf_unmap_early; struct list_head *uf_unmap; /* VMA state, determined in do_mremap(). */ struct vm_area_struct *vma; /* Internal state, determined in do_mremap(). */ unsigned long delta; /* Absolute delta of old_len,new_len. */ bool populate_expand; /* mlock()'d expanded, must populate. */ enum mremap_type remap_type; /* expand, shrink, etc. */ bool mmap_locked; /* Is mm currently write-locked? */ unsigned long charged; /* If VM_ACCOUNT, # pages to account. */ bool vmi_needs_invalidate; /* Is the VMA iterator invalidated? */ }; static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pgd = pgd_offset(mm, addr); if (pgd_none_or_clear_bad(pgd)) return NULL; p4d = p4d_offset(pgd, addr); if (p4d_none_or_clear_bad(p4d)) return NULL; pud = pud_offset(p4d, addr); if (pud_none_or_clear_bad(pud)) return NULL; return pud; } static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) { pud_t *pud; pmd_t *pmd; pud = get_old_pud(mm, addr); if (!pud) return NULL; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return NULL; return pmd; } static pud_t *alloc_new_pud(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; pgd = pgd_offset(mm, addr); p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return NULL; return pud_alloc(mm, p4d, addr); } static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) { pud_t *pud; pmd_t *pmd; pud = alloc_new_pud(mm, addr); if (!pud) return NULL; pmd = pmd_alloc(mm, pud, addr); if (!pmd) return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); return pmd; } static void take_rmap_locks(struct vm_area_struct *vma) { if (vma->vm_file) i_mmap_lock_write(vma->vm_file->f_mapping); if (vma->anon_vma) anon_vma_lock_write(vma->anon_vma); } static void drop_rmap_locks(struct vm_area_struct *vma) { if (vma->anon_vma) anon_vma_unlock_write(vma->anon_vma); if (vma->vm_file) i_mmap_unlock_write(vma->vm_file->f_mapping); } static pte_t move_soft_dirty_pte(pte_t pte) { if (pte_none(pte)) return pte; /* * Set soft dirty bit so we can notice * in userspace the ptes were moved. */ if (pgtable_supports_soft_dirty()) { if (pte_present(pte)) pte = pte_mksoft_dirty(pte); else pte = pte_swp_mksoft_dirty(pte); } return pte; } static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int max_nr) { struct folio *folio; if (max_nr == 1) return 1; /* Avoid expensive folio lookup if we stand no chance of benefit. */ if (pte_batch_hint(ptep, pte) == 1) return 1; folio = vm_normal_folio(vma, addr, pte); if (!folio || !folio_test_large(folio)) return 1; return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE); } static int move_ptes(struct pagetable_move_control *pmc, unsigned long extent, pmd_t *old_pmd, pmd_t *new_pmd) { struct vm_area_struct *vma = pmc->old; bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma); struct mm_struct *mm = vma->vm_mm; pte_t *old_ptep, *new_ptep; pte_t old_pte, pte; pmd_t dummy_pmdval; spinlock_t *old_ptl, *new_ptl; bool force_flush = false; unsigned long old_addr = pmc->old_addr; unsigned long new_addr = pmc->new_addr; unsigned long old_end = old_addr + extent; unsigned long len = old_end - old_addr; int max_nr_ptes; int nr_ptes; int err = 0; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma * locks to ensure that rmap will always observe either the old or the * new ptes. This is the easiest way to avoid races with * truncate_pagecache(), page migration, etc... * * When need_rmap_locks is false, we use other ways to avoid * such races: * * - During exec() shift_arg_pages(), we use a specially tagged vma * which rmap call sites look for using vma_is_temporary_stack(). * * - During mremap(), new_vma is often known to be placed after vma * in rmap traversal order. This ensures rmap will always observe * either the old pte, or the new pte, or both (the page table locks * serialize access to individual ptes, but only rmap traversal * order guarantees that we won't miss both the old and new ptes). */ if (pmc->need_rmap_locks) take_rmap_locks(vma); /* * We don't have to worry about the ordering of src and dst * pte locks because exclusive mmap_lock prevents deadlock. */ old_ptep = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); if (!old_ptep) { err = -EAGAIN; goto out; } /* * Now new_pte is none, so hpage_collapse_scan_file() path can not find * this by traversing file->f_mapping, so there is no concurrency with * retract_page_tables(). In addition, we already hold the exclusive * mmap_lock, so this new_pte page is stable, so there is no need to get * pmdval and do pmd_same() check. */ new_ptep = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &dummy_pmdval, &new_ptl); if (!new_ptep) { pte_unmap_unlock(old_ptep, old_ptl); err = -EAGAIN; goto out; } if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_ptep += nr_ptes, old_addr += nr_ptes * PAGE_SIZE, new_ptep += nr_ptes, new_addr += nr_ptes * PAGE_SIZE) { VM_WARN_ON_ONCE(!pte_none(*new_ptep)); nr_ptes = 1; max_nr_ptes = (old_end - old_addr) >> PAGE_SHIFT; old_pte = ptep_get(old_ptep); if (pte_none(old_pte)) continue; /* * If we are remapping a valid PTE, make sure * to flush TLB before we drop the PTL for the * PTE. * * NOTE! Both old and new PTL matter: the old one * for racing with folio_mkclean(), the new one to * make sure the physical page stays valid until * the TLB entry for the old mapping has been * flushed. */ if (pte_present(old_pte)) { nr_ptes = mremap_folio_pte_batch(vma, old_addr, old_ptep, old_pte, max_nr_ptes); force_flush = true; } pte = get_and_clear_ptes(mm, old_addr, old_ptep, nr_ptes); pte = move_pte(pte, old_addr, new_addr); pte = move_soft_dirty_pte(pte); if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) pte_clear(mm, new_addr, new_ptep); else { if (need_clear_uffd_wp) { if (pte_present(pte)) pte = pte_clear_uffd_wp(pte); else pte = pte_swp_clear_uffd_wp(pte); } set_ptes(mm, new_addr, new_ptep, pte, nr_ptes); } } arch_leave_lazy_mmu_mode(); if (force_flush) flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_ptep - 1); pte_unmap_unlock(old_ptep - 1, old_ptl); out: if (pmc->need_rmap_locks) drop_rmap_locks(vma); return err; } #ifndef arch_supports_page_table_move #define arch_supports_page_table_move arch_supports_page_table_move static inline bool arch_supports_page_table_move(void) { return IS_ENABLED(CONFIG_HAVE_MOVE_PMD) || IS_ENABLED(CONFIG_HAVE_MOVE_PUD); } #endif static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc) { /* * If we are moving a VMA that has uffd-wp registered but with * remap events disabled (new VMA will not be registered with uffd), we * need to ensure that the uffd-wp state is cleared from all pgtables. * This means recursing into lower page tables in move_page_tables(). * * We might get called with VMAs reversed when recovering from a * failed page table move. In that case, the * "old"-but-actually-"originally new" VMA during recovery will not have * a uffd context. Recursing into lower page tables during the original * move but not during the recovery move will cause trouble, because we * run into already-existing page tables. So check both VMAs. */ return !vma_has_uffd_without_event_remap(pmc->old) && !vma_has_uffd_without_event_remap(pmc->new); } #ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct pagetable_move_control *pmc, pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; bool res = false; pmd_t pmd; if (!arch_supports_page_table_move()) return false; if (!uffd_supports_page_table_move(pmc)) return false; /* * The destination pmd shouldn't be established, free_pgtables() * should have released it. * * However, there's a case during execve() where we use mremap * to move the initial stack, and in that case the target area * may overlap the source area (always moving down). * * If everything is PMD-aligned, that works fine, as moving * each pmd down will clear the source pmd. But if we first * have a few 4kB-only pages that get moved down, and then * hit the "now the rest is PMD-aligned, let's do everything * one pmd at a time", we will still have the old (now empty * of any 4kB pages, but still there) PMD in the page table * tree. * * Warn on it once - because we really should try to figure * out how to do this better - but then say "I won't move * this pmd". * * One alternative might be to just unmap the target pmd at * this point, and verify that it really is empty. We'll see. */ if (WARN_ON_ONCE(!pmd_none(*new_pmd))) return false; /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ old_ptl = pmd_lock(mm, old_pmd); new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = *old_pmd; /* Racing with collapse? */ if (unlikely(!pmd_present(pmd) || pmd_leaf(pmd))) goto out_unlock; /* Clear the pmd */ pmd_clear(old_pmd); res = true; VM_BUG_ON(!pmd_none(*new_pmd)); pmd_populate(mm, new_pmd, pmd_pgtable(pmd)); flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PMD_SIZE); out_unlock: if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); return res; } #else static inline bool move_normal_pmd(struct pagetable_move_control *pmc, pmd_t *old_pmd, pmd_t *new_pmd) { return false; } #endif #if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD) static bool move_normal_pud(struct pagetable_move_control *pmc, pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; pud_t pud; if (!arch_supports_page_table_move()) return false; if (!uffd_supports_page_table_move(pmc)) return false; /* * The destination pud shouldn't be established, free_pgtables() * should have released it. */ if (WARN_ON_ONCE(!pud_none(*new_pud))) return false; /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ old_ptl = pud_lock(mm, old_pud); new_ptl = pud_lockptr(mm, new_pud); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); /* Clear the pud */ pud = *old_pud; pud_clear(old_pud); VM_BUG_ON(!pud_none(*new_pud)); pud_populate(mm, new_pud, pud_pgtable(pud)); flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PUD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } #else static inline bool move_normal_pud(struct pagetable_move_control *pmc, pud_t *old_pud, pud_t *new_pud) { return false; } #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) static bool move_huge_pud(struct pagetable_move_control *pmc, pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; pud_t pud; /* * The destination pud shouldn't be established, free_pgtables() * should have released it. */ if (WARN_ON_ONCE(!pud_none(*new_pud))) return false; /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ old_ptl = pud_lock(mm, old_pud); new_ptl = pud_lockptr(mm, new_pud); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); /* Clear the pud */ pud = *old_pud; pud_clear(old_pud); VM_BUG_ON(!pud_none(*new_pud)); /* Set the new pud */ /* mark soft_ditry when we add pud level soft dirty support */ set_pud_at(mm, pmc->new_addr, new_pud, pud); flush_pud_tlb_range(vma, pmc->old_addr, pmc->old_addr + HPAGE_PUD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } #else static bool move_huge_pud(struct pagetable_move_control *pmc, pud_t *old_pud, pud_t *new_pud) { WARN_ON_ONCE(1); return false; } #endif enum pgt_entry { NORMAL_PMD, HPAGE_PMD, NORMAL_PUD, HPAGE_PUD, }; /* * Returns an extent of the corresponding size for the pgt_entry specified if * valid. Else returns a smaller extent bounded by the end of the source and * destination pgt_entry. */ static __always_inline unsigned long get_extent(enum pgt_entry entry, struct pagetable_move_control *pmc) { unsigned long next, extent, mask, size; unsigned long old_addr = pmc->old_addr; unsigned long old_end = pmc->old_end; unsigned long new_addr = pmc->new_addr; switch (entry) { case HPAGE_PMD: case NORMAL_PMD: mask = PMD_MASK; size = PMD_SIZE; break; case HPAGE_PUD: case NORMAL_PUD: mask = PUD_MASK; size = PUD_SIZE; break; default: BUILD_BUG(); break; } next = (old_addr + size) & mask; /* even if next overflowed, extent below will be ok */ extent = next - old_addr; if (extent > old_end - old_addr) extent = old_end - old_addr; next = (new_addr + size) & mask; if (extent > next - new_addr) extent = next - new_addr; return extent; } /* * Should move_pgt_entry() acquire the rmap locks? This is either expressed in * the PMC, or overridden in the case of normal, larger page tables. */ static bool should_take_rmap_locks(struct pagetable_move_control *pmc, enum pgt_entry entry) { switch (entry) { case NORMAL_PMD: case NORMAL_PUD: return true; default: return pmc->need_rmap_locks; } } /* * Attempts to speedup the move by moving entry at the level corresponding to * pgt_entry. Returns true if the move was successful, else false. */ static bool move_pgt_entry(struct pagetable_move_control *pmc, enum pgt_entry entry, void *old_entry, void *new_entry) { bool moved = false; bool need_rmap_locks = should_take_rmap_locks(pmc, entry); /* See comment in move_ptes() */ if (need_rmap_locks) take_rmap_locks(pmc->old); switch (entry) { case NORMAL_PMD: moved = move_normal_pmd(pmc, old_entry, new_entry); break; case NORMAL_PUD: moved = move_normal_pud(pmc, old_entry, new_entry); break; case HPAGE_PMD: moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && move_huge_pmd(pmc->old, pmc->old_addr, pmc->new_addr, old_entry, new_entry); break; case HPAGE_PUD: moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && move_huge_pud(pmc, old_entry, new_entry); break; default: WARN_ON_ONCE(1); break; } if (need_rmap_locks) drop_rmap_locks(pmc->old); return moved; } /* * A helper to check if aligning down is OK. The aligned address should fall * on *no mapping*. For the stack moving down, that's a special move within * the VMA that is created to span the source and destination of the move, * so we make an exception for it. */ static bool can_align_down(struct pagetable_move_control *pmc, struct vm_area_struct *vma, unsigned long addr_to_align, unsigned long mask) { unsigned long addr_masked = addr_to_align & mask; /* * If @addr_to_align of either source or destination is not the beginning * of the corresponding VMA, we can't align down or we will destroy part * of the current mapping. */ if (!pmc->for_stack && vma->vm_start != addr_to_align) return false; /* In the stack case we explicitly permit in-VMA alignment. */ if (pmc->for_stack && addr_masked >= vma->vm_start) return true; /* * Make sure the realignment doesn't cause the address to fall on an * existing mapping. */ return find_vma_intersection(vma->vm_mm, addr_masked, vma->vm_start) == NULL; } /* * Determine if are in fact able to realign for efficiency to a higher page * table boundary. */ static bool can_realign_addr(struct pagetable_move_control *pmc, unsigned long pagetable_mask) { unsigned long align_mask = ~pagetable_mask; unsigned long old_align = pmc->old_addr & align_mask; unsigned long new_align = pmc->new_addr & align_mask; unsigned long pagetable_size = align_mask + 1; unsigned long old_align_next = pagetable_size - old_align; /* * We don't want to have to go hunting for VMAs from the end of the old * VMA to the next page table boundary, also we want to make sure the * operation is wortwhile. * * So ensure that we only perform this realignment if the end of the * range being copied reaches or crosses the page table boundary. * * boundary boundary * .<- old_align -> . * . |----------------.-----------| * . | vma . | * . |----------------.-----------| * . <----------------.-----------> * . len_in * <-------------------------------> * . pagetable_size . * . <----------------> * . old_align_next . */ if (pmc->len_in < old_align_next) return false; /* Skip if the addresses are already aligned. */ if (old_align == 0) return false; /* Only realign if the new and old addresses are mutually aligned. */ if (old_align != new_align) return false; /* Ensure realignment doesn't cause overlap with existing mappings. */ if (!can_align_down(pmc, pmc->old, pmc->old_addr, pagetable_mask) || !can_align_down(pmc, pmc->new, pmc->new_addr, pagetable_mask)) return false; return true; } /* * Opportunistically realign to specified boundary for faster copy. * * Consider an mremap() of a VMA with page table boundaries as below, and no * preceding VMAs from the lower page table boundary to the start of the VMA, * with the end of the range reaching or crossing the page table boundary. * * boundary boundary * . |----------------.-----------| * . | vma . | * . |----------------.-----------| * . pmc->old_addr . pmc->old_end * . <----------------------------> * . move these page tables * * If we proceed with moving page tables in this scenario, we will have a lot of * work to do traversing old page tables and establishing new ones in the * destination across multiple lower level page tables. * * The idea here is simply to align pmc->old_addr, pmc->new_addr down to the * page table boundary, so we can simply copy a single page table entry for the * aligned portion of the VMA instead: * * boundary boundary * . |----------------.-----------| * . | vma . | * . |----------------.-----------| * pmc->old_addr . pmc->old_end * <-------------------------------------------> * . move these page tables */ static void try_realign_addr(struct pagetable_move_control *pmc, unsigned long pagetable_mask) { if (!can_realign_addr(pmc, pagetable_mask)) return; /* * Simply align to page table boundaries. Note that we do NOT update the * pmc->old_end value, and since the move_page_tables() operation spans * from [old_addr, old_end) (offsetting new_addr as it is performed), * this simply changes the start of the copy, not the end. */ pmc->old_addr &= pagetable_mask; pmc->new_addr &= pagetable_mask; } /* Is the page table move operation done? */ static bool pmc_done(struct pagetable_move_control *pmc) { return pmc->old_addr >= pmc->old_end; } /* Advance to the next page table, offset by extent bytes. */ static void pmc_next(struct pagetable_move_control *pmc, unsigned long extent) { pmc->old_addr += extent; pmc->new_addr += extent; } /* * Determine how many bytes in the specified input range have had their page * tables moved so far. */ static unsigned long pmc_progress(struct pagetable_move_control *pmc) { unsigned long orig_old_addr = pmc->old_end - pmc->len_in; unsigned long old_addr = pmc->old_addr; /* * Prevent negative return values when {old,new}_addr was realigned but * we broke out of the loop in move_page_tables() for the first PMD * itself. */ return old_addr < orig_old_addr ? 0 : old_addr - orig_old_addr; } unsigned long move_page_tables(struct pagetable_move_control *pmc) { unsigned long extent; struct mmu_notifier_range range; pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; struct mm_struct *mm = pmc->old->vm_mm; if (!pmc->len_in) return 0; if (is_vm_hugetlb_page(pmc->old)) return move_hugetlb_page_tables(pmc->old, pmc->new, pmc->old_addr, pmc->new_addr, pmc->len_in); /* * If possible, realign addresses to PMD boundary for faster copy. * Only realign if the mremap copying hits a PMD boundary. */ try_realign_addr(pmc, PMD_MASK); flush_cache_range(pmc->old, pmc->old_addr, pmc->old_end); mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, mm, pmc->old_addr, pmc->old_end); mmu_notifier_invalidate_range_start(&range); for (; !pmc_done(pmc); pmc_next(pmc, extent)) { cond_resched(); /* * If extent is PUD-sized try to speed up the move by moving at the * PUD level if possible. */ extent = get_extent(NORMAL_PUD, pmc); old_pud = get_old_pud(mm, pmc->old_addr); if (!old_pud) continue; new_pud = alloc_new_pud(mm, pmc->new_addr); if (!new_pud) break; if (pud_trans_huge(*old_pud)) { if (extent == HPAGE_PUD_SIZE) { move_pgt_entry(pmc, HPAGE_PUD, old_pud, new_pud); /* We ignore and continue on error? */ continue; } } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { if (move_pgt_entry(pmc, NORMAL_PUD, old_pud, new_pud)) continue; } extent = get_extent(NORMAL_PMD, pmc); old_pmd = get_old_pmd(mm, pmc->old_addr); if (!old_pmd) continue; new_pmd = alloc_new_pmd(mm, pmc->new_addr); if (!new_pmd) break; again: if (pmd_is_huge(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd)) continue; split_huge_pmd(pmc->old, old_pmd, pmc->old_addr); } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && extent == PMD_SIZE) { /* * If the extent is PMD-sized, try to speed the move by * moving at the PMD level if possible. */ if (move_pgt_entry(pmc, NORMAL_PMD, old_pmd, new_pmd)) continue; } if (pmd_none(*old_pmd)) continue; if (pte_alloc(pmc->new->vm_mm, new_pmd)) break; if (move_ptes(pmc, extent, old_pmd, new_pmd) < 0) goto again; } mmu_notifier_invalidate_range_end(&range); return pmc_progress(pmc); } /* Set vrm->delta to the difference in VMA size specified by user. */ static void vrm_set_delta(struct vma_remap_struct *vrm) { vrm->delta = abs_diff(vrm->old_len, vrm->new_len); } /* Determine what kind of remap this is - shrink, expand or no resize at all. */ static enum mremap_type vrm_remap_type(struct vma_remap_struct *vrm) { if (vrm->delta == 0) return MREMAP_NO_RESIZE; if (vrm->old_len > vrm->new_len) return MREMAP_SHRINK; return MREMAP_EXPAND; } /* * When moving a VMA to vrm->new_adr, does this result in the new and old VMAs * overlapping? */ static bool vrm_overlaps(struct vma_remap_struct *vrm) { unsigned long start_old = vrm->addr; unsigned long start_new = vrm->new_addr; unsigned long end_old = vrm->addr + vrm->old_len; unsigned long end_new = vrm->new_addr + vrm->new_len; /* * start_old end_old * |-----------| * | | * |-----------| * |-------------| * | | * |-------------| * start_new end_new */ if (end_old > start_new && end_new > start_old) return true; return false; } /* * Will a new address definitely be assigned? This either if the user specifies * it via MREMAP_FIXED, or if MREMAP_DONTUNMAP is used, indicating we will * always detemrine a target address. */ static bool vrm_implies_new_addr(struct vma_remap_struct *vrm) { return vrm->flags & (MREMAP_FIXED | MREMAP_DONTUNMAP); } /* * Find an unmapped area for the requested vrm->new_addr. * * If MREMAP_FIXED then this is equivalent to a MAP_FIXED mmap() call. If only * MREMAP_DONTUNMAP is set, then this is equivalent to providing a hint to * mmap(), otherwise this is equivalent to mmap() specifying a NULL address. * * Returns 0 on success (with vrm->new_addr updated), or an error code upon * failure. */ static unsigned long vrm_set_new_addr(struct vma_remap_struct *vrm) { struct vm_area_struct *vma = vrm->vma; unsigned long map_flags = 0; /* Page Offset _into_ the VMA. */ pgoff_t internal_pgoff = (vrm->addr - vma->vm_start) >> PAGE_SHIFT; pgoff_t pgoff = vma->vm_pgoff + internal_pgoff; unsigned long new_addr = vrm_implies_new_addr(vrm) ? vrm->new_addr : 0; unsigned long res; if (vrm->flags & MREMAP_FIXED) map_flags |= MAP_FIXED; if (vma->vm_flags & VM_MAYSHARE) map_flags |= MAP_SHARED; res = get_unmapped_area(vma->vm_file, new_addr, vrm->new_len, pgoff, map_flags); if (IS_ERR_VALUE(res)) return res; vrm->new_addr = res; return 0; } /* * Keep track of pages which have been added to the memory mapping. If the VMA * is accounted, also check to see if there is sufficient memory. * * Returns true on success, false if insufficient memory to charge. */ static bool vrm_calc_charge(struct vma_remap_struct *vrm) { unsigned long charged; if (!(vrm->vma->vm_flags & VM_ACCOUNT)) return true; /* * If we don't unmap the old mapping, then we account the entirety of * the length of the new one. Otherwise it's just the delta in size. */ if (vrm->flags & MREMAP_DONTUNMAP) charged = vrm->new_len >> PAGE_SHIFT; else charged = vrm->delta >> PAGE_SHIFT; /* This accounts 'charged' pages of memory. */ if (security_vm_enough_memory_mm(current->mm, charged)) return false; vrm->charged = charged; return true; } /* * an error has occurred so we will not be using vrm->charged memory. Unaccount * this memory if the VMA is accounted. */ static void vrm_uncharge(struct vma_remap_struct *vrm) { if (!(vrm->vma->vm_flags & VM_ACCOUNT)) return; vm_unacct_memory(vrm->charged); vrm->charged = 0; } /* * Update mm exec_vm, stack_vm, data_vm, and locked_vm fields as needed to * account for 'bytes' memory used, and if locked, indicate this in the VRM so * we can handle this correctly later. */ static void vrm_stat_account(struct vma_remap_struct *vrm, unsigned long bytes) { unsigned long pages = bytes >> PAGE_SHIFT; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = vrm->vma; vm_stat_account(mm, vma->vm_flags, pages); if (vma->vm_flags & VM_LOCKED) mm->locked_vm += pages; } /* * Perform checks before attempting to write a VMA prior to it being * moved. */ static unsigned long prep_move_vma(struct vma_remap_struct *vrm) { unsigned long err = 0; struct vm_area_struct *vma = vrm->vma; unsigned long old_addr = vrm->addr; unsigned long old_len = vrm->old_len; vm_flags_t dummy = vma->vm_flags; /* * We'd prefer to avoid failure later on in do_munmap: * which may split one vma into three before unmapping. */ if (current->mm->map_count >= sysctl_max_map_count - 3) return -ENOMEM; if (vma->vm_ops && vma->vm_ops->may_split) { if (vma->vm_start != old_addr) err = vma->vm_ops->may_split(vma, old_addr); if (!err && vma->vm_end != old_addr + old_len) err = vma->vm_ops->may_split(vma, old_addr + old_len); if (err) return err; } /* * Advise KSM to break any KSM pages in the area to be moved: * it would be confusing if they were to turn up at the new * location, where they happen to coincide with different KSM * pages recently unmapped. But leave vma->vm_flags as it was, * so KSM can come around to merge on vma and new_vma afterwards. */ err = ksm_madvise(vma, old_addr, old_addr + old_len, MADV_UNMERGEABLE, &dummy); if (err) return err; return 0; } /* * Unmap source VMA for VMA move, turning it from a copy to a move, being * careful to ensure we do not underflow memory account while doing so if an * accountable move. * * This is best effort, if we fail to unmap then we simply try to correct * accounting and exit. */ static void unmap_source_vma(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; unsigned long addr = vrm->addr; unsigned long len = vrm->old_len; struct vm_area_struct *vma = vrm->vma; VMA_ITERATOR(vmi, mm, addr); int err; unsigned long vm_start; unsigned long vm_end; /* * It might seem odd that we check for MREMAP_DONTUNMAP here, given this * function implies that we unmap the original VMA, which seems * contradictory. * * However, this occurs when this operation was attempted and an error * arose, in which case we _do_ wish to unmap the _new_ VMA, which means * we actually _do_ want it be unaccounted. */ bool accountable_move = (vma->vm_flags & VM_ACCOUNT) && !(vrm->flags & MREMAP_DONTUNMAP); /* * So we perform a trick here to prevent incorrect accounting. Any merge * or new VMA allocation performed in copy_vma() does not adjust * accounting, it is expected that callers handle this. * * And indeed we already have, accounting appropriately in the case of * both in vrm_charge(). * * However, when we unmap the existing VMA (to effect the move), this * code will, if the VMA has VM_ACCOUNT set, attempt to unaccount * removed pages. * * To avoid this we temporarily clear this flag, reinstating on any * portions of the original VMA that remain. */ if (accountable_move) { vm_flags_clear(vma, VM_ACCOUNT); /* We are about to split vma, so store the start/end. */ vm_start = vma->vm_start; vm_end = vma->vm_end; } err = do_vmi_munmap(&vmi, mm, addr, len, vrm->uf_unmap, /* unlock= */false); vrm->vma = NULL; /* Invalidated. */ vrm->vmi_needs_invalidate = true; if (err) { /* OOM: unable to split vma, just get accounts right */ vm_acct_memory(len >> PAGE_SHIFT); return; } /* * If we mremap() from a VMA like this: * * addr end * | | * v v * |-------------| * | | * |-------------| * * Having cleared VM_ACCOUNT from the whole VMA, after we unmap above * we'll end up with: * * addr end * | | * v v * |---| |---| * | A | | B | * |---| |---| * * The VMI is still pointing at addr, so vma_prev() will give us A, and * a subsequent or lone vma_next() will give as B. * * do_vmi_munmap() will have restored the VMI back to addr. */ if (accountable_move) { unsigned long end = addr + len; if (vm_start < addr) { struct vm_area_struct *prev = vma_prev(&vmi); vm_flags_set(prev, VM_ACCOUNT); /* Acquires VMA lock. */ } if (vm_end > end) { struct vm_area_struct *next = vma_next(&vmi); vm_flags_set(next, VM_ACCOUNT); /* Acquires VMA lock. */ } } } /* * Copy vrm->vma over to vrm->new_addr possibly adjusting size as part of the * process. Additionally handle an error occurring on moving of page tables, * where we reset vrm state to cause unmapping of the new VMA. * * Outputs the newly installed VMA to new_vma_ptr. Returns 0 on success or an * error code. */ static int copy_vma_and_data(struct vma_remap_struct *vrm, struct vm_area_struct **new_vma_ptr) { unsigned long internal_offset = vrm->addr - vrm->vma->vm_start; unsigned long internal_pgoff = internal_offset >> PAGE_SHIFT; unsigned long new_pgoff = vrm->vma->vm_pgoff + internal_pgoff; unsigned long moved_len; struct vm_area_struct *vma = vrm->vma; struct vm_area_struct *new_vma; int err = 0; PAGETABLE_MOVE(pmc, NULL, NULL, vrm->addr, vrm->new_addr, vrm->old_len); new_vma = copy_vma(&vma, vrm->new_addr, vrm->new_len, new_pgoff, &pmc.need_rmap_locks); if (!new_vma) { vrm_uncharge(vrm); *new_vma_ptr = NULL; return -ENOMEM; } /* By merging, we may have invalidated any iterator in use. */ if (vma != vrm->vma) vrm->vmi_needs_invalidate = true; vrm->vma = vma; pmc.old = vma; pmc.new = new_vma; moved_len = move_page_tables(&pmc); if (moved_len < vrm->old_len) err = -ENOMEM; else if (vma->vm_ops && vma->vm_ops->mremap) err = vma->vm_ops->mremap(new_vma); if (unlikely(err)) { PAGETABLE_MOVE(pmc_revert, new_vma, vma, vrm->new_addr, vrm->addr, moved_len); /* * On error, move entries back from new area to old, * which will succeed since page tables still there, * and then proceed to unmap new area instead of old. */ pmc_revert.need_rmap_locks = true; move_page_tables(&pmc_revert); vrm->vma = new_vma; vrm->old_len = vrm->new_len; vrm->addr = vrm->new_addr; } else { mremap_userfaultfd_prep(new_vma, vrm->uf); } fixup_hugetlb_reservations(vma); *new_vma_ptr = new_vma; return err; } /* * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() flag on * remaining VMA by convention (it cannot be mlock()'d any longer, as pages in * range are no longer mapped), and removing anon_vma_chain links from it if the * entire VMA was copied over. */ static void dontunmap_complete(struct vma_remap_struct *vrm, struct vm_area_struct *new_vma) { unsigned long start = vrm->addr; unsigned long end = vrm->addr + vrm->old_len; unsigned long old_start = vrm->vma->vm_start; unsigned long old_end = vrm->vma->vm_end; /* We always clear VM_LOCKED[ONFAULT] on the old VMA. */ vm_flags_clear(vrm->vma, VM_LOCKED_MASK); /* * anon_vma links of the old vma is no longer needed after its page * table has been moved. */ if (new_vma != vrm->vma && start == old_start && end == old_end) unlink_anon_vmas(vrm->vma); /* Because we won't unmap we don't need to touch locked_vm. */ } static unsigned long move_vma(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; struct vm_area_struct *new_vma; unsigned long hiwater_vm; int err; err = prep_move_vma(vrm); if (err) return err; /* * If accounted, determine the number of bytes the operation will * charge. */ if (!vrm_calc_charge(vrm)) return -ENOMEM; /* We don't want racing faults. */ vma_start_write(vrm->vma); /* Perform copy step. */ err = copy_vma_and_data(vrm, &new_vma); /* * If we established the copied-to VMA, we attempt to recover from the * error by setting the destination VMA to the source VMA and unmapping * it below. */ if (err && !new_vma) return err; /* * If we failed to move page tables we still do total_vm increment * since do_munmap() will decrement it by old_len == new_len. * * Since total_vm is about to be raised artificially high for a * moment, we need to restore high watermark afterwards: if stats * are taken meanwhile, total_vm and hiwater_vm appear too high. * If this were a serious issue, we'd add a flag to do_munmap(). */ hiwater_vm = mm->hiwater_vm; vrm_stat_account(vrm, vrm->new_len); if (unlikely(!err && (vrm->flags & MREMAP_DONTUNMAP))) dontunmap_complete(vrm, new_vma); else unmap_source_vma(vrm); mm->hiwater_vm = hiwater_vm; return err ? (unsigned long)err : vrm->new_addr; } /* * The user has requested that the VMA be shrunk (i.e., old_len > new_len), so * execute this, optionally dropping the mmap lock when we do so. * * In both cases this invalidates the VMA, however if we don't drop the lock, * then load the correct VMA into vrm->vma afterwards. */ static unsigned long shrink_vma(struct vma_remap_struct *vrm, bool drop_lock) { struct mm_struct *mm = current->mm; unsigned long unmap_start = vrm->addr + vrm->new_len; unsigned long unmap_bytes = vrm->delta; unsigned long res; VMA_ITERATOR(vmi, mm, unmap_start); VM_BUG_ON(vrm->remap_type != MREMAP_SHRINK); res = do_vmi_munmap(&vmi, mm, unmap_start, unmap_bytes, vrm->uf_unmap, drop_lock); vrm->vma = NULL; /* Invalidated. */ if (res) return res; /* * If we've not dropped the lock, then we should reload the VMA to * replace the invalidated VMA with the one that may have now been * split. */ if (drop_lock) { vrm->mmap_locked = false; } else { vrm->vma = vma_lookup(mm, vrm->addr); if (!vrm->vma) return -EFAULT; } return 0; } /* * mremap_to() - remap a vma to a new location. * Returns: The new address of the vma or an error. */ static unsigned long mremap_to(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; unsigned long err; if (vrm->flags & MREMAP_FIXED) { /* * In mremap_to(). * VMA is moved to dst address, and munmap dst first. * do_munmap will check if dst is sealed. */ err = do_munmap(mm, vrm->new_addr, vrm->new_len, vrm->uf_unmap_early); vrm->vma = NULL; /* Invalidated. */ vrm->vmi_needs_invalidate = true; if (err) return err; /* * If we remap a portion of a VMA elsewhere in the same VMA, * this can invalidate the old VMA. Reset. */ vrm->vma = vma_lookup(mm, vrm->addr); if (!vrm->vma) return -EFAULT; } if (vrm->remap_type == MREMAP_SHRINK) { err = shrink_vma(vrm, /* drop_lock= */false); if (err) return err; /* Set up for the move now shrink has been executed. */ vrm->old_len = vrm->new_len; } /* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */ if (vrm->flags & MREMAP_DONTUNMAP) { vm_flags_t vm_flags = vrm->vma->vm_flags; unsigned long pages = vrm->old_len >> PAGE_SHIFT; if (!may_expand_vm(mm, vm_flags, pages)) return -ENOMEM; } err = vrm_set_new_addr(vrm); if (err) return err; return move_vma(vrm); } static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) { unsigned long end = vma->vm_end + delta; if (end < vma->vm_end) /* overflow */ return 0; if (find_vma_intersection(vma->vm_mm, vma->vm_end, end)) return 0; if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start, 0, MAP_FIXED) & ~PAGE_MASK) return 0; return 1; } /* Determine whether we are actually able to execute an in-place expansion. */ static bool vrm_can_expand_in_place(struct vma_remap_struct *vrm) { /* Number of bytes from vrm->addr to end of VMA. */ unsigned long suffix_bytes = vrm->vma->vm_end - vrm->addr; /* If end of range aligns to end of VMA, we can just expand in-place. */ if (suffix_bytes != vrm->old_len) return false; /* Check whether this is feasible. */ if (!vma_expandable(vrm->vma, vrm->delta)) return false; return true; } /* * We know we can expand the VMA in-place by delta pages, so do so. * * If we discover the VMA is locked, update mm_struct statistics accordingly and * indicate so to the caller. */ static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = vrm->vma; VMA_ITERATOR(vmi, mm, vma->vm_end); if (!vrm_calc_charge(vrm)) return -ENOMEM; /* * Function vma_merge_extend() is called on the * extension we are adding to the already existing vma, * vma_merge_extend() will merge this extension with the * already existing vma (expand operation itself) and * possibly also with the next vma if it becomes * adjacent to the expanded vma and otherwise * compatible. */ vma = vma_merge_extend(&vmi, vma, vrm->delta); if (!vma) { vrm_uncharge(vrm); return -ENOMEM; } vrm->vma = vma; vrm_stat_account(vrm, vrm->delta); return 0; } static bool align_hugetlb(struct vma_remap_struct *vrm) { struct hstate *h __maybe_unused = hstate_vma(vrm->vma); vrm->old_len = ALIGN(vrm->old_len, huge_page_size(h)); vrm->new_len = ALIGN(vrm->new_len, huge_page_size(h)); /* addrs must be huge page aligned */ if (vrm->addr & ~huge_page_mask(h)) return false; if (vrm->new_addr & ~huge_page_mask(h)) return false; /* * Don't allow remap expansion, because the underlying hugetlb * reservation is not yet capable to handle split reservation. */ if (vrm->new_len > vrm->old_len) return false; return true; } /* * We are mremap()'ing without specifying a fixed address to move to, but are * requesting that the VMA's size be increased. * * Try to do so in-place, if this fails, then move the VMA to a new location to * action the change. */ static unsigned long expand_vma(struct vma_remap_struct *vrm) { unsigned long err; /* * [addr, old_len) spans precisely to the end of the VMA, so try to * expand it in-place. */ if (vrm_can_expand_in_place(vrm)) { err = expand_vma_in_place(vrm); if (err) return err; /* OK we're done! */ return vrm->addr; } /* * We weren't able to just expand or shrink the area, * we need to create a new one and move it. */ /* We're not allowed to move the VMA, so error out. */ if (!(vrm->flags & MREMAP_MAYMOVE)) return -ENOMEM; /* Find a new location to move the VMA to. */ err = vrm_set_new_addr(vrm); if (err) return err; return move_vma(vrm); } /* * Attempt to resize the VMA in-place, if we cannot, then move the VMA to the * first available address to perform the operation. */ static unsigned long mremap_at(struct vma_remap_struct *vrm) { unsigned long res; switch (vrm->remap_type) { case MREMAP_INVALID: break; case MREMAP_NO_RESIZE: /* NO-OP CASE - resizing to the same size. */ return vrm->addr; case MREMAP_SHRINK: /* * SHRINK CASE. Can always be done in-place. * * Simply unmap the shrunken portion of the VMA. This does all * the needed commit accounting, and we indicate that the mmap * lock should be dropped. */ res = shrink_vma(vrm, /* drop_lock= */true); if (res) return res; return vrm->addr; case MREMAP_EXPAND: return expand_vma(vrm); } /* Should not be possible. */ WARN_ON_ONCE(1); return -EINVAL; } /* * Will this operation result in the VMA being expanded or moved and thus need * to map a new portion of virtual address space? */ static bool vrm_will_map_new(struct vma_remap_struct *vrm) { if (vrm->remap_type == MREMAP_EXPAND) return true; if (vrm_implies_new_addr(vrm)) return true; return false; } /* Does this remap ONLY move mappings? */ static bool vrm_move_only(struct vma_remap_struct *vrm) { if (!(vrm->flags & MREMAP_FIXED)) return false; if (vrm->old_len != vrm->new_len) return false; return true; } static void notify_uffd(struct vma_remap_struct *vrm, bool failed) { struct mm_struct *mm = current->mm; /* Regardless of success/failure, we always notify of any unmaps. */ userfaultfd_unmap_complete(mm, vrm->uf_unmap_early); if (failed) mremap_userfaultfd_fail(vrm->uf); else mremap_userfaultfd_complete(vrm->uf, vrm->addr, vrm->new_addr, vrm->old_len); userfaultfd_unmap_complete(mm, vrm->uf_unmap); } static bool vma_multi_allowed(struct vm_area_struct *vma) { struct file *file = vma->vm_file; /* * We can't support moving multiple uffd VMAs as notify requires * mmap lock to be dropped. */ if (userfaultfd_armed(vma)) return false; /* * Custom get unmapped area might result in MREMAP_FIXED not * being obeyed. */ if (!file || !file->f_op->get_unmapped_area) return true; /* Known good. */ if (vma_is_shmem(vma)) return true; if (is_vm_hugetlb_page(vma)) return true; if (file->f_op->get_unmapped_area == thp_get_unmapped_area) return true; return false; } static int check_prep_vma(struct vma_remap_struct *vrm) { struct vm_area_struct *vma = vrm->vma; struct mm_struct *mm = current->mm; unsigned long addr = vrm->addr; unsigned long old_len, new_len, pgoff; if (!vma) return -EFAULT; /* If mseal()'d, mremap() is prohibited. */ if (vma_is_sealed(vma)) return -EPERM; /* Align to hugetlb page size, if required. */ if (is_vm_hugetlb_page(vma) && !align_hugetlb(vrm)) return -EINVAL; vrm_set_delta(vrm); vrm->remap_type = vrm_remap_type(vrm); /* For convenience, we set new_addr even if VMA won't move. */ if (!vrm_implies_new_addr(vrm)) vrm->new_addr = addr; /* Below only meaningful if we expand or move a VMA. */ if (!vrm_will_map_new(vrm)) return 0; old_len = vrm->old_len; new_len = vrm->new_len; /* * !old_len is a special case where an attempt is made to 'duplicate' * a mapping. This makes no sense for private mappings as it will * instead create a fresh/new mapping unrelated to the original. This * is contrary to the basic idea of mremap which creates new mappings * based on the original. There are no known use cases for this * behavior. As a result, fail such attempts. */ if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) { pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", current->comm, current->pid); return -EINVAL; } if ((vrm->flags & MREMAP_DONTUNMAP) && (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))) return -EINVAL; /* * We permit crossing of boundaries for the range being unmapped due to * a shrink. */ if (vrm->remap_type == MREMAP_SHRINK) old_len = new_len; /* * We can't remap across the end of VMAs, as another VMA may be * adjacent: * * addr vma->vm_end * |-----.----------| * | . | * |-----.----------| * .<--------->xxx> * old_len * * We also require that vma->vm_start <= addr < vma->vm_end. */ if (old_len > vma->vm_end - addr) return -EFAULT; if (new_len == old_len) return 0; /* We are expanding and the VMA is mlock()'d so we need to populate. */ if (vma->vm_flags & VM_LOCKED) vrm->populate_expand = true; /* Need to be careful about a growing mapping */ pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; if (pgoff + (new_len >> PAGE_SHIFT) < pgoff) return -EINVAL; if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) return -EFAULT; if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta)) return -EAGAIN; if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT)) return -ENOMEM; return 0; } /* * Are the parameters passed to mremap() valid? If so return 0, otherwise return * error. */ static unsigned long check_mremap_params(struct vma_remap_struct *vrm) { unsigned long addr = vrm->addr; unsigned long flags = vrm->flags; /* Ensure no unexpected flag values. */ if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) return -EINVAL; /* Start address must be page-aligned. */ if (offset_in_page(addr)) return -EINVAL; /* * We allow a zero old-len as a special case * for DOS-emu "duplicate shm area" thing. But * a zero new-len is nonsensical. */ if (!vrm->new_len) return -EINVAL; /* Is the new length silly? */ if (vrm->new_len > TASK_SIZE) return -EINVAL; /* Remainder of checks are for cases with specific new_addr. */ if (!vrm_implies_new_addr(vrm)) return 0; /* Is the new address silly? */ if (vrm->new_addr > TASK_SIZE - vrm->new_len) return -EINVAL; /* The new address must be page-aligned. */ if (offset_in_page(vrm->new_addr)) return -EINVAL; /* A fixed address implies a move. */ if (!(flags & MREMAP_MAYMOVE)) return -EINVAL; /* MREMAP_DONTUNMAP does not allow resizing in the process. */ if (flags & MREMAP_DONTUNMAP && vrm->old_len != vrm->new_len) return -EINVAL; /* Target VMA must not overlap source VMA. */ if (vrm_overlaps(vrm)) return -EINVAL; /* * move_vma() need us to stay 4 maps below the threshold, otherwise * it will bail out at the very beginning. * That is a problem if we have already unmaped the regions here * (new_addr, and old_addr), because userspace will not know the * state of the vma's after it gets -ENOMEM. * So, to avoid such scenario we can pre-compute if the whole * operation has high chances to success map-wise. * Worst-scenario case is when both vma's (new_addr and old_addr) get * split in 3 before unmapping it. * That means 2 more maps (1 for each) to the ones we already hold. * Check whether current map count plus 2 still leads us to 4 maps below * the threshold, otherwise return -ENOMEM here to be more safe. */ if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3) return -ENOMEM; return 0; } static unsigned long remap_move(struct vma_remap_struct *vrm) { struct vm_area_struct *vma; unsigned long start = vrm->addr; unsigned long end = vrm->addr + vrm->old_len; unsigned long new_addr = vrm->new_addr; unsigned long target_addr = new_addr; unsigned long res = -EFAULT; unsigned long last_end; bool seen_vma = false; VMA_ITERATOR(vmi, current->mm, start); /* * When moving VMAs we allow for batched moves across multiple VMAs, * with all VMAs in the input range [addr, addr + old_len) being moved * (and split as necessary). */ for_each_vma_range(vmi, vma, end) { /* Account for start, end not aligned with VMA start, end. */ unsigned long addr = max(vma->vm_start, start); unsigned long len = min(end, vma->vm_end) - addr; unsigned long offset, res_vma; bool multi_allowed; /* No gap permitted at the start of the range. */ if (!seen_vma && start < vma->vm_start) return -EFAULT; /* * To sensibly move multiple VMAs, accounting for the fact that * get_unmapped_area() may align even MAP_FIXED moves, we simply * attempt to move such that the gaps between source VMAs remain * consistent in destination VMAs, e.g.: * * X Y X Y * <---> <-> <---> <-> * |-------| |-----| |-----| |-------| |-----| |-----| * | A | | B | | C | ---> | A' | | B' | | C' | * |-------| |-----| |-----| |-------| |-----| |-----| * new_addr * * So we map B' at A'->vm_end + X, and C' at B'->vm_end + Y. */ offset = seen_vma ? vma->vm_start - last_end : 0; last_end = vma->vm_end; vrm->vma = vma; vrm->addr = addr; vrm->new_addr = target_addr + offset; vrm->old_len = vrm->new_len = len; multi_allowed = vma_multi_allowed(vma); if (!multi_allowed) { /* This is not the first VMA, abort immediately. */ if (seen_vma) return -EFAULT; /* This is the first, but there are more, abort. */ if (vma->vm_end < end) return -EFAULT; } res_vma = check_prep_vma(vrm); if (!res_vma) res_vma = mremap_to(vrm); if (IS_ERR_VALUE(res_vma)) return res_vma; if (!seen_vma) { VM_WARN_ON_ONCE(multi_allowed && res_vma != new_addr); res = res_vma; } /* mmap lock is only dropped on shrink. */ VM_WARN_ON_ONCE(!vrm->mmap_locked); /* This is a move, no expand should occur. */ VM_WARN_ON_ONCE(vrm->populate_expand); if (vrm->vmi_needs_invalidate) { vma_iter_invalidate(&vmi); vrm->vmi_needs_invalidate = false; } seen_vma = true; target_addr = res_vma + vrm->new_len; } return res; } static unsigned long do_mremap(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; unsigned long res; bool failed; vrm->old_len = PAGE_ALIGN(vrm->old_len); vrm->new_len = PAGE_ALIGN(vrm->new_len); res = check_mremap_params(vrm); if (res) return res; if (mmap_write_lock_killable(mm)) return -EINTR; vrm->mmap_locked = true; if (vrm_move_only(vrm)) { res = remap_move(vrm); } else { vrm->vma = vma_lookup(current->mm, vrm->addr); res = check_prep_vma(vrm); if (res) goto out; /* Actually execute mremap. */ res = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm); } out: failed = IS_ERR_VALUE(res); if (vrm->mmap_locked) mmap_write_unlock(mm); /* VMA mlock'd + was expanded, so populated expanded region. */ if (!failed && vrm->populate_expand) mm_populate(vrm->new_addr + vrm->old_len, vrm->delta); notify_uffd(vrm, failed); return res; } /* * Expand (or shrink) an existing mapping, potentially moving it at the * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) * * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise * This option implies MREMAP_MAYMOVE. */ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr) { struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; LIST_HEAD(uf_unmap_early); LIST_HEAD(uf_unmap); /* * There is a deliberate asymmetry here: we strip the pointer tag * from the old address but leave the new address alone. This is * for consistency with mmap(), where we prevent the creation of * aliasing mappings in userspace by leaving the tag bits of the * mapping address intact. A non-zero tag will cause the subsequent * range checks to reject the address as invalid. * * See Documentation/arch/arm64/tagged-address-abi.rst for more * information. */ struct vma_remap_struct vrm = { .addr = untagged_addr(addr), .old_len = old_len, .new_len = new_len, .flags = flags, .new_addr = new_addr, .uf = &uf, .uf_unmap_early = &uf_unmap_early, .uf_unmap = &uf_unmap, .remap_type = MREMAP_INVALID, /* We set later. */ }; return do_mremap(&vrm); } |
| 36 36 36 36 21 21 21 21 21 21 36 36 35 35 36 35 36 36 36 36 36 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 | // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/ceph/types.h> #include <linux/ceph/decode.h> #include <linux/ceph/libceph.h> #include <linux/ceph/messenger.h> #include "auth_none.h" #include "auth_x.h" /* * get protocol handler */ static u32 supported_protocols[] = { CEPH_AUTH_NONE, CEPH_AUTH_CEPHX }; static int init_protocol(struct ceph_auth_client *ac, int proto) { dout("%s proto %d\n", __func__, proto); switch (proto) { case CEPH_AUTH_NONE: return ceph_auth_none_init(ac); case CEPH_AUTH_CEPHX: return ceph_x_init(ac); default: pr_err("bad auth protocol %d\n", proto); return -EINVAL; } } void ceph_auth_set_global_id(struct ceph_auth_client *ac, u64 global_id) { dout("%s global_id %llu\n", __func__, global_id); if (!global_id) pr_err("got zero global_id\n"); if (ac->global_id && global_id != ac->global_id) pr_err("global_id changed from %llu to %llu\n", ac->global_id, global_id); ac->global_id = global_id; } /* * setup, teardown. */ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key, const int *con_modes) { struct ceph_auth_client *ac; ac = kzalloc(sizeof(*ac), GFP_NOFS); if (!ac) return ERR_PTR(-ENOMEM); mutex_init(&ac->mutex); ac->negotiating = true; if (name) ac->name = name; else ac->name = CEPH_AUTH_NAME_DEFAULT; ac->key = key; ac->preferred_mode = con_modes[0]; ac->fallback_mode = con_modes[1]; dout("%s name '%s' preferred_mode %d fallback_mode %d\n", __func__, ac->name, ac->preferred_mode, ac->fallback_mode); return ac; } void ceph_auth_destroy(struct ceph_auth_client *ac) { dout("auth_destroy %p\n", ac); if (ac->ops) ac->ops->destroy(ac); kfree(ac); } /* * Reset occurs when reconnecting to the monitor. */ void ceph_auth_reset(struct ceph_auth_client *ac) { mutex_lock(&ac->mutex); dout("auth_reset %p\n", ac); if (ac->ops && !ac->negotiating) ac->ops->reset(ac); ac->negotiating = true; mutex_unlock(&ac->mutex); } /* * EntityName, not to be confused with entity_name_t */ int ceph_auth_entity_name_encode(const char *name, void **p, void *end) { int len = strlen(name); if (*p + 2*sizeof(u32) + len > end) return -ERANGE; ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT); ceph_encode_32(p, len); ceph_encode_copy(p, name, len); return 0; } /* * Initiate protocol negotiation with monitor. Include entity name * and list supported protocols. */ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len) { struct ceph_mon_request_header *monhdr = buf; void *p = monhdr + 1, *end = buf + len, *lenp; int i, num; int ret; mutex_lock(&ac->mutex); dout("auth_build_hello\n"); monhdr->have_version = 0; monhdr->session_mon = cpu_to_le16(-1); monhdr->session_mon_tid = 0; ceph_encode_32(&p, CEPH_AUTH_UNKNOWN); /* no protocol, yet */ lenp = p; p += sizeof(u32); ceph_decode_need(&p, end, 1 + sizeof(u32), bad); ceph_encode_8(&p, 1); num = ARRAY_SIZE(supported_protocols); ceph_encode_32(&p, num); ceph_decode_need(&p, end, num * sizeof(u32), bad); for (i = 0; i < num; i++) ceph_encode_32(&p, supported_protocols[i]); ret = ceph_auth_entity_name_encode(ac->name, &p, end); if (ret < 0) goto out; ceph_decode_need(&p, end, sizeof(u64), bad); ceph_encode_64(&p, ac->global_id); ceph_encode_32(&lenp, p - lenp - sizeof(u32)); ret = p - buf; out: mutex_unlock(&ac->mutex); return ret; bad: ret = -ERANGE; goto out; } static int build_request(struct ceph_auth_client *ac, bool add_header, void *buf, int buf_len) { void *end = buf + buf_len; void *p; int ret; p = buf; if (add_header) { /* struct ceph_mon_request_header + protocol */ ceph_encode_64_safe(&p, end, 0, e_range); ceph_encode_16_safe(&p, end, -1, e_range); ceph_encode_64_safe(&p, end, 0, e_range); ceph_encode_32_safe(&p, end, ac->protocol, e_range); } ceph_encode_need(&p, end, sizeof(u32), e_range); ret = ac->ops->build_request(ac, p + sizeof(u32), end); if (ret < 0) { pr_err("auth protocol '%s' building request failed: %d\n", ceph_auth_proto_name(ac->protocol), ret); return ret; } dout(" built request %d bytes\n", ret); ceph_encode_32(&p, ret); return p + ret - buf; e_range: return -ERANGE; } /* * Handle auth message from monitor. */ int ceph_handle_auth_reply(struct ceph_auth_client *ac, void *buf, size_t len, void *reply_buf, size_t reply_len) { void *p = buf; void *end = buf + len; int protocol; s32 result; u64 global_id; void *payload, *payload_end; int payload_len; char *result_msg; int result_msg_len; int ret = -EINVAL; mutex_lock(&ac->mutex); dout("handle_auth_reply %p %p\n", p, end); ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad); protocol = ceph_decode_32(&p); result = ceph_decode_32(&p); global_id = ceph_decode_64(&p); payload_len = ceph_decode_32(&p); payload = p; p += payload_len; ceph_decode_need(&p, end, sizeof(u32), bad); result_msg_len = ceph_decode_32(&p); result_msg = p; p += result_msg_len; if (p != end) goto bad; dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len, result_msg, global_id, payload_len); payload_end = payload + payload_len; if (ac->negotiating) { /* server does not support our protocols? */ if (!protocol && result < 0) { ret = result; goto out; } /* set up (new) protocol handler? */ if (ac->protocol && ac->protocol != protocol) { ac->ops->destroy(ac); ac->protocol = 0; ac->ops = NULL; } if (ac->protocol != protocol) { ret = init_protocol(ac, protocol); if (ret) { pr_err("auth protocol '%s' init failed: %d\n", ceph_auth_proto_name(protocol), ret); goto out; } } ac->negotiating = false; } if (result) { pr_err("auth protocol '%s' mauth authentication failed: %d\n", ceph_auth_proto_name(ac->protocol), result); ret = result; goto out; } ret = ac->ops->handle_reply(ac, global_id, payload, payload_end, NULL, NULL, NULL, NULL); if (ret == -EAGAIN) { ret = build_request(ac, true, reply_buf, reply_len); goto out; } else if (ret) { goto out; } out: mutex_unlock(&ac->mutex); return ret; bad: pr_err("failed to decode auth msg\n"); ret = -EINVAL; goto out; } int ceph_build_auth(struct ceph_auth_client *ac, void *msg_buf, size_t msg_len) { int ret = 0; mutex_lock(&ac->mutex); if (ac->ops->should_authenticate(ac)) ret = build_request(ac, true, msg_buf, msg_len); mutex_unlock(&ac->mutex); return ret; } int ceph_auth_is_authenticated(struct ceph_auth_client *ac) { int ret = 0; mutex_lock(&ac->mutex); if (ac->ops) ret = ac->ops->is_authenticated(ac); mutex_unlock(&ac->mutex); return ret; } EXPORT_SYMBOL(ceph_auth_is_authenticated); int __ceph_auth_get_authorizer(struct ceph_auth_client *ac, struct ceph_auth_handshake *auth, int peer_type, bool force_new, int *proto, int *pref_mode, int *fallb_mode) { int ret; mutex_lock(&ac->mutex); if (force_new && auth->authorizer) { ceph_auth_destroy_authorizer(auth->authorizer); auth->authorizer = NULL; } if (!auth->authorizer) ret = ac->ops->create_authorizer(ac, peer_type, auth); else if (ac->ops->update_authorizer) ret = ac->ops->update_authorizer(ac, peer_type, auth); else ret = 0; if (ret) goto out; *proto = ac->protocol; if (pref_mode && fallb_mode) { *pref_mode = ac->preferred_mode; *fallb_mode = ac->fallback_mode; } out: mutex_unlock(&ac->mutex); return ret; } EXPORT_SYMBOL(__ceph_auth_get_authorizer); void ceph_auth_destroy_authorizer(struct ceph_authorizer *a) { a->destroy(a); } EXPORT_SYMBOL(ceph_auth_destroy_authorizer); int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, struct ceph_authorizer *a, void *challenge_buf, int challenge_buf_len) { int ret = 0; mutex_lock(&ac->mutex); if (ac->ops && ac->ops->add_authorizer_challenge) ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf, challenge_buf_len); mutex_unlock(&ac->mutex); return ret; } EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge); int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a, void *reply, int reply_len, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len) { int ret = 0; mutex_lock(&ac->mutex); if (ac->ops && ac->ops->verify_authorizer_reply) ret = ac->ops->verify_authorizer_reply(ac, a, reply, reply_len, session_key, session_key_len, con_secret, con_secret_len); mutex_unlock(&ac->mutex); return ret; } EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply); void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type) { mutex_lock(&ac->mutex); if (ac->ops && ac->ops->invalidate_authorizer) ac->ops->invalidate_authorizer(ac, peer_type); mutex_unlock(&ac->mutex); } EXPORT_SYMBOL(ceph_auth_invalidate_authorizer); /* * msgr2 authentication */ static bool contains(const int *arr, int cnt, int val) { int i; for (i = 0; i < cnt; i++) { if (arr[i] == val) return true; } return false; } static int encode_con_modes(void **p, void *end, int pref_mode, int fallb_mode) { WARN_ON(pref_mode == CEPH_CON_MODE_UNKNOWN); if (fallb_mode != CEPH_CON_MODE_UNKNOWN) { ceph_encode_32_safe(p, end, 2, e_range); ceph_encode_32_safe(p, end, pref_mode, e_range); ceph_encode_32_safe(p, end, fallb_mode, e_range); } else { ceph_encode_32_safe(p, end, 1, e_range); ceph_encode_32_safe(p, end, pref_mode, e_range); } return 0; e_range: return -ERANGE; } /* * Similar to ceph_auth_build_hello(). */ int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len) { int proto = ac->key ? CEPH_AUTH_CEPHX : CEPH_AUTH_NONE; void *end = buf + buf_len; void *lenp; void *p; int ret; mutex_lock(&ac->mutex); if (ac->protocol == CEPH_AUTH_UNKNOWN) { ret = init_protocol(ac, proto); if (ret) { pr_err("auth protocol '%s' init failed: %d\n", ceph_auth_proto_name(proto), ret); goto out; } } else { WARN_ON(ac->protocol != proto); ac->ops->reset(ac); } p = buf; ceph_encode_32_safe(&p, end, ac->protocol, e_range); ret = encode_con_modes(&p, end, ac->preferred_mode, ac->fallback_mode); if (ret) goto out; lenp = p; p += 4; /* space for len */ ceph_encode_8_safe(&p, end, CEPH_AUTH_MODE_MON, e_range); ret = ceph_auth_entity_name_encode(ac->name, &p, end); if (ret) goto out; ceph_encode_64_safe(&p, end, ac->global_id, e_range); ceph_encode_32(&lenp, p - lenp - 4); ret = p - buf; out: mutex_unlock(&ac->mutex); return ret; e_range: ret = -ERANGE; goto out; } int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply, int reply_len, void *buf, int buf_len) { int ret; mutex_lock(&ac->mutex); ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len, NULL, NULL, NULL, NULL); if (ret == -EAGAIN) ret = build_request(ac, false, buf, buf_len); else WARN_ON(ret >= 0); mutex_unlock(&ac->mutex); return ret; } int ceph_auth_handle_reply_done(struct ceph_auth_client *ac, u64 global_id, void *reply, int reply_len, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len) { int ret; mutex_lock(&ac->mutex); ret = ac->ops->handle_reply(ac, global_id, reply, reply + reply_len, session_key, session_key_len, con_secret, con_secret_len); WARN_ON(ret == -EAGAIN || ret > 0); mutex_unlock(&ac->mutex); return ret; } bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac, int used_proto, int result, const int *allowed_protos, int proto_cnt, const int *allowed_modes, int mode_cnt) { mutex_lock(&ac->mutex); WARN_ON(used_proto != ac->protocol); if (result == -EOPNOTSUPP) { if (!contains(allowed_protos, proto_cnt, ac->protocol)) { pr_err("auth protocol '%s' not allowed\n", ceph_auth_proto_name(ac->protocol)); goto not_allowed; } if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) && (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN || !contains(allowed_modes, mode_cnt, ac->fallback_mode))) { pr_err("preferred mode '%s' not allowed\n", ceph_con_mode_name(ac->preferred_mode)); if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN) pr_err("no fallback mode\n"); else pr_err("fallback mode '%s' not allowed\n", ceph_con_mode_name(ac->fallback_mode)); goto not_allowed; } } WARN_ON(result == -EOPNOTSUPP || result >= 0); pr_err("auth protocol '%s' msgr authentication failed: %d\n", ceph_auth_proto_name(ac->protocol), result); mutex_unlock(&ac->mutex); return true; not_allowed: mutex_unlock(&ac->mutex); return false; } int ceph_auth_get_authorizer(struct ceph_auth_client *ac, struct ceph_auth_handshake *auth, int peer_type, void *buf, int *buf_len) { void *end = buf + *buf_len; int pref_mode, fallb_mode; int proto; void *p; int ret; ret = __ceph_auth_get_authorizer(ac, auth, peer_type, true, &proto, &pref_mode, &fallb_mode); if (ret) return ret; p = buf; ceph_encode_32_safe(&p, end, proto, e_range); ret = encode_con_modes(&p, end, pref_mode, fallb_mode); if (ret) return ret; ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range); *buf_len = p - buf; return 0; e_range: return -ERANGE; } EXPORT_SYMBOL(ceph_auth_get_authorizer); int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac, struct ceph_auth_handshake *auth, void *reply, int reply_len, void *buf, int *buf_len) { void *end = buf + *buf_len; void *p; int ret; ret = ceph_auth_add_authorizer_challenge(ac, auth->authorizer, reply, reply_len); if (ret) return ret; p = buf; ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range); *buf_len = p - buf; return 0; e_range: return -ERANGE; } EXPORT_SYMBOL(ceph_auth_handle_svc_reply_more); int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac, struct ceph_auth_handshake *auth, void *reply, int reply_len, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len) { return ceph_auth_verify_authorizer_reply(ac, auth->authorizer, reply, reply_len, session_key, session_key_len, con_secret, con_secret_len); } EXPORT_SYMBOL(ceph_auth_handle_svc_reply_done); bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac, int peer_type, int used_proto, int result, const int *allowed_protos, int proto_cnt, const int *allowed_modes, int mode_cnt) { mutex_lock(&ac->mutex); WARN_ON(used_proto != ac->protocol); if (result == -EOPNOTSUPP) { if (!contains(allowed_protos, proto_cnt, ac->protocol)) { pr_err("auth protocol '%s' not allowed by %s\n", ceph_auth_proto_name(ac->protocol), ceph_entity_type_name(peer_type)); goto not_allowed; } if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) && (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN || !contains(allowed_modes, mode_cnt, ac->fallback_mode))) { pr_err("preferred mode '%s' not allowed by %s\n", ceph_con_mode_name(ac->preferred_mode), ceph_entity_type_name(peer_type)); if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN) pr_err("no fallback mode\n"); else pr_err("fallback mode '%s' not allowed by %s\n", ceph_con_mode_name(ac->fallback_mode), ceph_entity_type_name(peer_type)); goto not_allowed; } } WARN_ON(result == -EOPNOTSUPP || result >= 0); pr_err("auth protocol '%s' authorization to %s failed: %d\n", ceph_auth_proto_name(ac->protocol), ceph_entity_type_name(peer_type), result); if (ac->ops->invalidate_authorizer) ac->ops->invalidate_authorizer(ac, peer_type); mutex_unlock(&ac->mutex); return true; not_allowed: mutex_unlock(&ac->mutex); return false; } EXPORT_SYMBOL(ceph_auth_handle_bad_authorizer); |
| 165 161 165 165 165 163 164 71 71 71 163 142 141 142 163 70 157 154 157 154 149 155 155 143 154 131 130 131 130 131 131 131 131 128 130 129 131 131 129 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 | /* * Copyright (C) 2018 Intel Corp. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Rob Clark <robdclark@gmail.com> * Daniel Vetter <daniel.vetter@ffwll.ch> */ #include <drm/drm_atomic.h> #include <drm/drm_atomic_state_helper.h> #include <drm/drm_blend.h> #include <drm/drm_bridge.h> #include <drm/drm_connector.h> #include <drm/drm_crtc.h> #include <drm/drm_device.h> #include <drm/drm_framebuffer.h> #include <drm/drm_plane.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include <drm/drm_writeback.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/dma-fence.h> /** * DOC: atomic state reset and initialization * * Both the drm core and the atomic helpers assume that there is always the full * and correct atomic software state for all connectors, CRTCs and planes * available. Which is a bit a problem on driver load and also after system * suspend. One way to solve this is to have a hardware state read-out * infrastructure which reconstructs the full software state (e.g. the i915 * driver). * * The simpler solution is to just reset the software state to everything off, * which is easiest to do by calling drm_mode_config_reset(). To facilitate this * the atomic helpers provide default reset implementations for all hooks. * * On the upside the precise state tracking of atomic simplifies system suspend * and resume a lot. For drivers using drm_mode_config_reset() a complete recipe * is implemented in drm_atomic_helper_suspend() and drm_atomic_helper_resume(). * For other drivers the building blocks are split out, see the documentation * for these functions. */ /** * __drm_atomic_helper_crtc_state_reset - reset the CRTC state * @crtc_state: atomic CRTC state, must not be NULL * @crtc: CRTC object, must not be NULL * * Initializes the newly allocated @crtc_state with default * values. This is useful for drivers that subclass the CRTC state. */ void __drm_atomic_helper_crtc_state_reset(struct drm_crtc_state *crtc_state, struct drm_crtc *crtc) { crtc_state->crtc = crtc; } EXPORT_SYMBOL(__drm_atomic_helper_crtc_state_reset); /** * __drm_atomic_helper_crtc_reset - reset state on CRTC * @crtc: drm CRTC * @crtc_state: CRTC state to assign * * Initializes the newly allocated @crtc_state and assigns it to * the &drm_crtc->state pointer of @crtc, usually required when * initializing the drivers or when called from the &drm_crtc_funcs.reset * hook. * * This is useful for drivers that subclass the CRTC state. */ void __drm_atomic_helper_crtc_reset(struct drm_crtc *crtc, struct drm_crtc_state *crtc_state) { if (crtc_state) __drm_atomic_helper_crtc_state_reset(crtc_state, crtc); if (drm_dev_has_vblank(crtc->dev)) drm_crtc_vblank_reset(crtc); crtc->state = crtc_state; } EXPORT_SYMBOL(__drm_atomic_helper_crtc_reset); /** * drm_atomic_helper_crtc_reset - default &drm_crtc_funcs.reset hook for CRTCs * @crtc: drm CRTC * * Resets the atomic state for @crtc by freeing the state pointer (which might * be NULL, e.g. at driver load time) and allocating a new empty state object. */ void drm_atomic_helper_crtc_reset(struct drm_crtc *crtc) { struct drm_crtc_state *crtc_state = kzalloc(sizeof(*crtc->state), GFP_KERNEL); if (crtc->state) crtc->funcs->atomic_destroy_state(crtc, crtc->state); __drm_atomic_helper_crtc_reset(crtc, crtc_state); } EXPORT_SYMBOL(drm_atomic_helper_crtc_reset); /** * __drm_atomic_helper_crtc_duplicate_state - copy atomic CRTC state * @crtc: CRTC object * @state: atomic CRTC state * * Copies atomic state from a CRTC's current state and resets inferred values. * This is useful for drivers that subclass the CRTC state. */ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc, struct drm_crtc_state *state) { memcpy(state, crtc->state, sizeof(*state)); if (state->mode_blob) drm_property_blob_get(state->mode_blob); if (state->degamma_lut) drm_property_blob_get(state->degamma_lut); if (state->ctm) drm_property_blob_get(state->ctm); if (state->gamma_lut) drm_property_blob_get(state->gamma_lut); state->mode_changed = false; state->active_changed = false; state->planes_changed = false; state->connectors_changed = false; state->color_mgmt_changed = false; state->zpos_changed = false; state->commit = NULL; state->event = NULL; state->async_flip = false; /* Self refresh should be canceled when a new update is available */ state->active = drm_atomic_crtc_effectively_active(state); state->self_refresh_active = false; } EXPORT_SYMBOL(__drm_atomic_helper_crtc_duplicate_state); /** * drm_atomic_helper_crtc_duplicate_state - default state duplicate hook * @crtc: drm CRTC * * Default CRTC state duplicate hook for drivers which don't have their own * subclassed CRTC state structure. */ struct drm_crtc_state * drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc) { struct drm_crtc_state *state; if (WARN_ON(!crtc->state)) return NULL; state = kmalloc(sizeof(*state), GFP_KERNEL); if (state) __drm_atomic_helper_crtc_duplicate_state(crtc, state); return state; } EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state); /** * __drm_atomic_helper_crtc_destroy_state - release CRTC state * @state: CRTC state object to release * * Releases all resources stored in the CRTC state without actually freeing * the memory of the CRTC state. This is useful for drivers that subclass the * CRTC state. */ void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state) { if (state->commit) { /* * In the event that a non-blocking commit returns * -ERESTARTSYS before the commit_tail work is queued, we will * have an extra reference to the commit object. Release it, if * the event has not been consumed by the worker. * * state->event may be freed, so we can't directly look at * state->event->base.completion. */ if (state->event && state->commit->abort_completion) drm_crtc_commit_put(state->commit); kfree(state->commit->event); state->commit->event = NULL; drm_crtc_commit_put(state->commit); } drm_property_blob_put(state->mode_blob); drm_property_blob_put(state->degamma_lut); drm_property_blob_put(state->ctm); drm_property_blob_put(state->gamma_lut); } EXPORT_SYMBOL(__drm_atomic_helper_crtc_destroy_state); /** * drm_atomic_helper_crtc_destroy_state - default state destroy hook * @crtc: drm CRTC * @state: CRTC state object to release * * Default CRTC state destroy hook for drivers which don't have their own * subclassed CRTC state structure. */ void drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state) { __drm_atomic_helper_crtc_destroy_state(state); kfree(state); } EXPORT_SYMBOL(drm_atomic_helper_crtc_destroy_state); /** * __drm_atomic_helper_plane_state_reset - resets plane state to default values * @plane_state: atomic plane state, must not be NULL * @plane: plane object, must not be NULL * * Initializes the newly allocated @plane_state with default * values. This is useful for drivers that subclass the CRTC state. */ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, struct drm_plane *plane) { u64 val; plane_state->plane = plane; plane_state->rotation = DRM_MODE_ROTATE_0; plane_state->alpha = DRM_BLEND_ALPHA_OPAQUE; plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; if (plane->color_encoding_property) { if (!drm_object_property_get_default_value(&plane->base, plane->color_encoding_property, &val)) plane_state->color_encoding = val; } if (plane->color_range_property) { if (!drm_object_property_get_default_value(&plane->base, plane->color_range_property, &val)) plane_state->color_range = val; } if (plane->color_pipeline_property) { /* default is always NULL, i.e., bypass */ plane_state->color_pipeline = NULL; } if (plane->zpos_property) { if (!drm_object_property_get_default_value(&plane->base, plane->zpos_property, &val)) { plane_state->zpos = val; plane_state->normalized_zpos = val; } } if (plane->hotspot_x_property) { if (!drm_object_property_get_default_value(&plane->base, plane->hotspot_x_property, &val)) plane_state->hotspot_x = val; } if (plane->hotspot_y_property) { if (!drm_object_property_get_default_value(&plane->base, plane->hotspot_y_property, &val)) plane_state->hotspot_y = val; } } EXPORT_SYMBOL(__drm_atomic_helper_plane_state_reset); /** * __drm_atomic_helper_plane_reset - reset state on plane * @plane: drm plane * @plane_state: plane state to assign * * Initializes the newly allocated @plane_state and assigns it to * the &drm_crtc->state pointer of @plane, usually required when * initializing the drivers or when called from the &drm_plane_funcs.reset * hook. * * This is useful for drivers that subclass the plane state. */ void __drm_atomic_helper_plane_reset(struct drm_plane *plane, struct drm_plane_state *plane_state) { if (plane_state) __drm_atomic_helper_plane_state_reset(plane_state, plane); plane->state = plane_state; } EXPORT_SYMBOL(__drm_atomic_helper_plane_reset); /** * drm_atomic_helper_plane_reset - default &drm_plane_funcs.reset hook for planes * @plane: drm plane * * Resets the atomic state for @plane by freeing the state pointer (which might * be NULL, e.g. at driver load time) and allocating a new empty state object. */ void drm_atomic_helper_plane_reset(struct drm_plane *plane) { if (plane->state) __drm_atomic_helper_plane_destroy_state(plane->state); kfree(plane->state); plane->state = kzalloc(sizeof(*plane->state), GFP_KERNEL); if (plane->state) __drm_atomic_helper_plane_reset(plane, plane->state); } EXPORT_SYMBOL(drm_atomic_helper_plane_reset); /** * __drm_atomic_helper_plane_duplicate_state - copy atomic plane state * @plane: plane object * @state: atomic plane state * * Copies atomic state from a plane's current state. This is useful for * drivers that subclass the plane state. */ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, struct drm_plane_state *state) { memcpy(state, plane->state, sizeof(*state)); if (state->fb) drm_framebuffer_get(state->fb); state->fence = NULL; state->commit = NULL; state->fb_damage_clips = NULL; state->color_mgmt_changed = false; } EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); /** * drm_atomic_helper_plane_duplicate_state - default state duplicate hook * @plane: drm plane * * Default plane state duplicate hook for drivers which don't have their own * subclassed plane state structure. */ struct drm_plane_state * drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane) { struct drm_plane_state *state; if (WARN_ON(!plane->state)) return NULL; state = kmalloc(sizeof(*state), GFP_KERNEL); if (state) __drm_atomic_helper_plane_duplicate_state(plane, state); return state; } EXPORT_SYMBOL(drm_atomic_helper_plane_duplicate_state); /** * __drm_atomic_helper_plane_destroy_state - release plane state * @state: plane state object to release * * Releases all resources stored in the plane state without actually freeing * the memory of the plane state. This is useful for drivers that subclass the * plane state. */ void __drm_atomic_helper_plane_destroy_state(struct drm_plane_state *state) { if (state->fb) drm_framebuffer_put(state->fb); if (state->fence) dma_fence_put(state->fence); if (state->commit) drm_crtc_commit_put(state->commit); drm_property_blob_put(state->fb_damage_clips); } EXPORT_SYMBOL(__drm_atomic_helper_plane_destroy_state); /** * drm_atomic_helper_plane_destroy_state - default state destroy hook * @plane: drm plane * @state: plane state object to release * * Default plane state destroy hook for drivers which don't have their own * subclassed plane state structure. */ void drm_atomic_helper_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { __drm_atomic_helper_plane_destroy_state(state); kfree(state); } EXPORT_SYMBOL(drm_atomic_helper_plane_destroy_state); /** * __drm_atomic_helper_connector_state_reset - reset the connector state * @conn_state: atomic connector state, must not be NULL * @connector: connectotr object, must not be NULL * * Initializes the newly allocated @conn_state with default * values. This is useful for drivers that subclass the connector state. */ void __drm_atomic_helper_connector_state_reset(struct drm_connector_state *conn_state, struct drm_connector *connector) { conn_state->connector = connector; } EXPORT_SYMBOL(__drm_atomic_helper_connector_state_reset); /** * __drm_atomic_helper_connector_reset - reset state on connector * @connector: drm connector * @conn_state: connector state to assign * * Initializes the newly allocated @conn_state and assigns it to * the &drm_connector->state pointer of @connector, usually required when * initializing the drivers or when called from the &drm_connector_funcs.reset * hook. * * This is useful for drivers that subclass the connector state. */ void __drm_atomic_helper_connector_reset(struct drm_connector *connector, struct drm_connector_state *conn_state) { if (conn_state) __drm_atomic_helper_connector_state_reset(conn_state, connector); connector->state = conn_state; } EXPORT_SYMBOL(__drm_atomic_helper_connector_reset); /** * drm_atomic_helper_connector_reset - default &drm_connector_funcs.reset hook for connectors * @connector: drm connector * * Resets the atomic state for @connector by freeing the state pointer (which * might be NULL, e.g. at driver load time) and allocating a new empty state * object. */ void drm_atomic_helper_connector_reset(struct drm_connector *connector) { struct drm_connector_state *conn_state = kzalloc(sizeof(*conn_state), GFP_KERNEL); if (connector->state) __drm_atomic_helper_connector_destroy_state(connector->state); kfree(connector->state); __drm_atomic_helper_connector_reset(connector, conn_state); } EXPORT_SYMBOL(drm_atomic_helper_connector_reset); /** * drm_atomic_helper_connector_tv_margins_reset - Resets TV connector properties * @connector: DRM connector * * Resets the TV-related properties attached to a connector. */ void drm_atomic_helper_connector_tv_margins_reset(struct drm_connector *connector) { struct drm_cmdline_mode *cmdline = &connector->cmdline_mode; struct drm_connector_state *state = connector->state; state->tv.margins.left = cmdline->tv_margins.left; state->tv.margins.right = cmdline->tv_margins.right; state->tv.margins.top = cmdline->tv_margins.top; state->tv.margins.bottom = cmdline->tv_margins.bottom; } EXPORT_SYMBOL(drm_atomic_helper_connector_tv_margins_reset); /** * drm_atomic_helper_connector_tv_reset - Resets Analog TV connector properties * @connector: DRM connector * * Resets the analog TV properties attached to a connector */ void drm_atomic_helper_connector_tv_reset(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct drm_cmdline_mode *cmdline = &connector->cmdline_mode; struct drm_connector_state *state = connector->state; struct drm_property *prop; uint64_t val; prop = dev->mode_config.tv_mode_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.mode = val; if (cmdline->tv_mode_specified) state->tv.mode = cmdline->tv_mode; prop = dev->mode_config.tv_select_subconnector_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.select_subconnector = val; prop = dev->mode_config.tv_subconnector_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.subconnector = val; prop = dev->mode_config.tv_brightness_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.brightness = val; prop = dev->mode_config.tv_contrast_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.contrast = val; prop = dev->mode_config.tv_flicker_reduction_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.flicker_reduction = val; prop = dev->mode_config.tv_overscan_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.overscan = val; prop = dev->mode_config.tv_saturation_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.saturation = val; prop = dev->mode_config.tv_hue_property; if (prop) if (!drm_object_property_get_default_value(&connector->base, prop, &val)) state->tv.hue = val; drm_atomic_helper_connector_tv_margins_reset(connector); } EXPORT_SYMBOL(drm_atomic_helper_connector_tv_reset); /** * drm_atomic_helper_connector_tv_check - Validate an analog TV connector state * @connector: DRM Connector * @state: the DRM State object * * Checks the state object to see if the requested state is valid for an * analog TV connector. * * Return: * %0 for success, a negative error code on error. */ int drm_atomic_helper_connector_tv_check(struct drm_connector *connector, struct drm_atomic_state *state) { struct drm_connector_state *old_conn_state = drm_atomic_get_old_connector_state(state, connector); struct drm_connector_state *new_conn_state = drm_atomic_get_new_connector_state(state, connector); struct drm_crtc_state *crtc_state; struct drm_crtc *crtc; crtc = new_conn_state->crtc; if (!crtc) return 0; crtc_state = drm_atomic_get_new_crtc_state(state, crtc); if (!crtc_state) return -EINVAL; if (old_conn_state->tv.mode != new_conn_state->tv.mode) crtc_state->mode_changed = true; if (old_conn_state->tv.margins.left != new_conn_state->tv.margins.left || old_conn_state->tv.margins.right != new_conn_state->tv.margins.right || old_conn_state->tv.margins.top != new_conn_state->tv.margins.top || old_conn_state->tv.margins.bottom != new_conn_state->tv.margins.bottom || old_conn_state->tv.mode != new_conn_state->tv.mode || old_conn_state->tv.brightness != new_conn_state->tv.brightness || old_conn_state->tv.contrast != new_conn_state->tv.contrast || old_conn_state->tv.flicker_reduction != new_conn_state->tv.flicker_reduction || old_conn_state->tv.overscan != new_conn_state->tv.overscan || old_conn_state->tv.saturation != new_conn_state->tv.saturation || old_conn_state->tv.hue != new_conn_state->tv.hue) crtc_state->connectors_changed = true; return 0; } EXPORT_SYMBOL(drm_atomic_helper_connector_tv_check); /** * __drm_atomic_helper_connector_duplicate_state - copy atomic connector state * @connector: connector object * @state: atomic connector state * * Copies atomic state from a connector's current state. This is useful for * drivers that subclass the connector state. */ void __drm_atomic_helper_connector_duplicate_state(struct drm_connector *connector, struct drm_connector_state *state) { memcpy(state, connector->state, sizeof(*state)); if (state->crtc) drm_connector_get(connector); state->commit = NULL; if (state->hdr_output_metadata) drm_property_blob_get(state->hdr_output_metadata); /* Don't copy over a writeback job, they are used only once */ state->writeback_job = NULL; } EXPORT_SYMBOL(__drm_atomic_helper_connector_duplicate_state); /** * drm_atomic_helper_connector_duplicate_state - default state duplicate hook * @connector: drm connector * * Default connector state duplicate hook for drivers which don't have their own * subclassed connector state structure. */ struct drm_connector_state * drm_atomic_helper_connector_duplicate_state(struct drm_connector *connector) { struct drm_connector_state *state; if (WARN_ON(!connector->state)) return NULL; state = kmalloc(sizeof(*state), GFP_KERNEL); if (state) __drm_atomic_helper_connector_duplicate_state(connector, state); return state; } EXPORT_SYMBOL(drm_atomic_helper_connector_duplicate_state); /** * __drm_atomic_helper_connector_destroy_state - release connector state * @state: connector state object to release * * Releases all resources stored in the connector state without actually * freeing the memory of the connector state. This is useful for drivers that * subclass the connector state. */ void __drm_atomic_helper_connector_destroy_state(struct drm_connector_state *state) { if (state->crtc) drm_connector_put(state->connector); if (state->commit) drm_crtc_commit_put(state->commit); if (state->writeback_job) drm_writeback_cleanup_job(state->writeback_job); drm_property_blob_put(state->hdr_output_metadata); } EXPORT_SYMBOL(__drm_atomic_helper_connector_destroy_state); /** * drm_atomic_helper_connector_destroy_state - default state destroy hook * @connector: drm connector * @state: connector state object to release * * Default connector state destroy hook for drivers which don't have their own * subclassed connector state structure. */ void drm_atomic_helper_connector_destroy_state(struct drm_connector *connector, struct drm_connector_state *state) { __drm_atomic_helper_connector_destroy_state(state); kfree(state); } EXPORT_SYMBOL(drm_atomic_helper_connector_destroy_state); /** * __drm_atomic_helper_private_obj_duplicate_state - copy atomic private state * @obj: CRTC object * @state: new private object state * * Copies atomic state from a private objects's current state and resets inferred values. * This is useful for drivers that subclass the private state. */ void __drm_atomic_helper_private_obj_duplicate_state(struct drm_private_obj *obj, struct drm_private_state *state) { memcpy(state, obj->state, sizeof(*state)); } EXPORT_SYMBOL(__drm_atomic_helper_private_obj_duplicate_state); /** * __drm_atomic_helper_bridge_duplicate_state() - Copy atomic bridge state * @bridge: bridge object * @state: atomic bridge state * * Copies atomic state from a bridge's current state and resets inferred values. * This is useful for drivers that subclass the bridge state. */ void __drm_atomic_helper_bridge_duplicate_state(struct drm_bridge *bridge, struct drm_bridge_state *state) { __drm_atomic_helper_private_obj_duplicate_state(&bridge->base, &state->base); state->bridge = bridge; } EXPORT_SYMBOL(__drm_atomic_helper_bridge_duplicate_state); /** * drm_atomic_helper_bridge_duplicate_state() - Duplicate a bridge state object * @bridge: bridge object * * Allocates a new bridge state and initializes it with the current bridge * state values. This helper is meant to be used as a bridge * &drm_bridge_funcs.atomic_duplicate_state hook for bridges that don't * subclass the bridge state. */ struct drm_bridge_state * drm_atomic_helper_bridge_duplicate_state(struct drm_bridge *bridge) { struct drm_bridge_state *new; if (WARN_ON(!bridge->base.state)) return NULL; new = kzalloc(sizeof(*new), GFP_KERNEL); if (new) __drm_atomic_helper_bridge_duplicate_state(bridge, new); return new; } EXPORT_SYMBOL(drm_atomic_helper_bridge_duplicate_state); /** * drm_atomic_helper_bridge_destroy_state() - Destroy a bridge state object * @bridge: the bridge this state refers to * @state: bridge state to destroy * * Destroys a bridge state previously created by * &drm_atomic_helper_bridge_reset() or * &drm_atomic_helper_bridge_duplicate_state(). This helper is meant to be * used as a bridge &drm_bridge_funcs.atomic_destroy_state hook for bridges * that don't subclass the bridge state. */ void drm_atomic_helper_bridge_destroy_state(struct drm_bridge *bridge, struct drm_bridge_state *state) { kfree(state); } EXPORT_SYMBOL(drm_atomic_helper_bridge_destroy_state); /** * __drm_atomic_helper_bridge_reset() - Initialize a bridge state to its * default * @bridge: the bridge this state refers to * @state: bridge state to initialize * * Initializes the bridge state to default values. This is meant to be called * by the bridge &drm_bridge_funcs.atomic_reset hook for bridges that subclass * the bridge state. */ void __drm_atomic_helper_bridge_reset(struct drm_bridge *bridge, struct drm_bridge_state *state) { memset(state, 0, sizeof(*state)); state->bridge = bridge; } EXPORT_SYMBOL(__drm_atomic_helper_bridge_reset); /** * drm_atomic_helper_bridge_reset() - Allocate and initialize a bridge state * to its default * @bridge: the bridge this state refers to * * Allocates the bridge state and initializes it to default values. This helper * is meant to be used as a bridge &drm_bridge_funcs.atomic_reset hook for * bridges that don't subclass the bridge state. */ struct drm_bridge_state * drm_atomic_helper_bridge_reset(struct drm_bridge *bridge) { struct drm_bridge_state *bridge_state; bridge_state = kzalloc(sizeof(*bridge_state), GFP_KERNEL); if (!bridge_state) return ERR_PTR(-ENOMEM); __drm_atomic_helper_bridge_reset(bridge, bridge_state); return bridge_state; } EXPORT_SYMBOL(drm_atomic_helper_bridge_reset); |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * acpi.h - ACPI Interface * * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> */ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H #include <linux/cleanup.h> #include <linux/errno.h> #include <linux/ioport.h> /* for struct resource */ #include <linux/resource_ext.h> #include <linux/device.h> #include <linux/mod_devicetable.h> #include <linux/property.h> #include <linux/uuid.h> #include <linux/node.h> struct irq_domain; struct irq_domain_ops; #ifndef _LINUX #define _LINUX #endif #include <acpi/acpi.h> #include <acpi/acpi_numa.h> #ifdef CONFIG_ACPI #include <linux/list.h> #include <linux/dynamic_debug.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/fw_table.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> #include <acpi/acpi_io.h> #include <asm/acpi.h> #ifdef CONFIG_ACPI_TABLE_LIB #define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, "ACPI") #define __init_or_acpilib #define __initdata_or_acpilib #else #define EXPORT_SYMBOL_ACPI_LIB(x) #define __init_or_acpilib __init #define __initdata_or_acpilib __initdata #endif static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return adev ? adev->handle : NULL; } #define ACPI_COMPANION(dev) to_acpi_device_node((dev)->fwnode) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) #define ACPI_HANDLE_FWNODE(fwnode) \ acpi_device_handle(to_acpi_device_node(fwnode)) static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { struct fwnode_handle *fwnode; fwnode = kzalloc(sizeof(struct fwnode_handle), GFP_KERNEL); if (!fwnode) return NULL; fwnode_init(fwnode, &acpi_static_fwnode_ops); return fwnode; } static inline void acpi_free_fwnode_static(struct fwnode_handle *fwnode) { if (WARN_ON(!is_acpi_static_node(fwnode))) return; kfree(fwnode); } static inline bool has_acpi_companion(struct device *dev) { return is_acpi_device_node(dev->fwnode); } static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false)); } static inline const char *acpi_dev_name(struct acpi_device *adev) { return dev_name(&adev->dev); } struct device *acpi_get_first_physical_node(struct acpi_device *adev); enum acpi_irq_model_id { ACPI_IRQ_MODEL_PIC = 0, ACPI_IRQ_MODEL_IOAPIC, ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, ACPI_IRQ_MODEL_LPIC, ACPI_IRQ_MODEL_RINTC, ACPI_IRQ_MODEL_COUNT }; extern enum acpi_irq_model_id acpi_irq_model; enum acpi_interrupt_id { ACPI_INTERRUPT_PMI = 1, ACPI_INTERRUPT_INIT, ACPI_INTERRUPT_CPEI, ACPI_INTERRUPT_COUNT }; #define ACPI_SPACE_MEM 0 enum acpi_address_range_id { ACPI_ADDRESS_RANGE_MEMORY = 1, ACPI_ADDRESS_RANGE_RESERVED = 2, ACPI_ADDRESS_RANGE_ACPI = 3, ACPI_ADDRESS_RANGE_NVS = 4, ACPI_ADDRESS_RANGE_COUNT }; /* Table Handlers */ typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); /* Debugger support */ struct acpi_debugger_ops { int (*create_thread)(acpi_osd_exec_callback function, void *context); ssize_t (*write_log)(const char *msg); ssize_t (*read_cmd)(char *buffer, size_t length); int (*wait_command_ready)(bool single_step, char *buffer, size_t length); int (*notify_command_complete)(void); }; struct acpi_debugger { const struct acpi_debugger_ops *ops; struct module *owner; struct mutex lock; }; #ifdef CONFIG_ACPI_DEBUGGER int __init acpi_debugger_init(void); int acpi_register_debugger(struct module *owner, const struct acpi_debugger_ops *ops); void acpi_unregister_debugger(const struct acpi_debugger_ops *ops); int acpi_debugger_create_thread(acpi_osd_exec_callback function, void *context); ssize_t acpi_debugger_write_log(const char *msg); ssize_t acpi_debugger_read_cmd(char *buffer, size_t buffer_length); int acpi_debugger_wait_command_ready(void); int acpi_debugger_notify_command_complete(void); #else static inline int acpi_debugger_init(void) { return -ENODEV; } static inline int acpi_register_debugger(struct module *owner, const struct acpi_debugger_ops *ops) { return -ENODEV; } static inline void acpi_unregister_debugger(const struct acpi_debugger_ops *ops) { } static inline int acpi_debugger_create_thread(acpi_osd_exec_callback function, void *context) { return -ENODEV; } static inline int acpi_debugger_write_log(const char *msg) { return -ENODEV; } static inline int acpi_debugger_read_cmd(char *buffer, u32 buffer_length) { return -ENODEV; } static inline int acpi_debugger_wait_command_ready(void) { return -ENODEV; } static inline int acpi_debugger_notify_command_complete(void) { return -ENODEV; } #endif #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); void acpi_boot_table_prepare (void); void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); int acpi_locate_initial_tables (void); void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); static inline struct acpi_table_header *acpi_get_table_pointer(char *signature, u32 instance) { struct acpi_table_header *table; int status = acpi_get_table(signature, instance, &table); if (ACPI_FAILURE(status)) return ERR_PTR(-ENOENT); return table; } DEFINE_FREE(acpi_put_table, struct acpi_table_header *, if (!IS_ERR_OR_NULL(_T)) acpi_put_table(_T)) int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, acpi_tbl_entry_handler handler, unsigned int max_entries); int __init_or_acpilib acpi_table_parse_entries_array(char *id, unsigned long table_size, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries); int acpi_table_parse_madt(enum acpi_madt_type id, acpi_tbl_entry_handler handler, unsigned int max_entries); int __init_or_acpilib acpi_table_parse_cedt(enum acpi_cedt_type id, acpi_tbl_entry_handler_arg handler_arg, void *arg); int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { } #endif void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #if defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) void acpi_arch_dma_setup(struct device *dev); #else static inline void acpi_arch_dma_setup(struct device *dev) { } #endif #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } #endif #ifdef CONFIG_RISCV void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa); #else static inline void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) { } #endif #ifndef PHYS_CPUID_INVALID typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) #endif static inline bool invalid_logical_cpuid(u32 cpuid) { return (int)cpuid < 0; } static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) { return phys_id == PHYS_CPUID_INVALID; } int __init acpi_get_madt_revision(void); /* Validate the processor object's proc_id */ bool acpi_duplicate_processor_id(int proc_id); /* Processor _CTS control */ struct acpi_processor_power; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE bool acpi_processor_claim_cst_control(void); int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, struct acpi_processor_power *info); #else static inline bool acpi_processor_claim_cst_control(void) { return false; } static inline int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, struct acpi_processor_power *info) { return -ENODEV; } #endif #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ acpi_handle acpi_get_processor_handle(int cpu); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; extern unsigned int acpi_sci_irq; extern bool acpi_no_s5; #define INVALID_ACPI_IRQ ((unsigned)-1) static inline bool acpi_sci_irq_valid(void) { return acpi_sci_irq != INVALID_ACPI_IRQ; } extern int sbf_port; int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); typedef struct fwnode_handle *(*acpi_gsi_domain_disp_fn)(u32); void acpi_set_irq_model(enum acpi_irq_model_id model, acpi_gsi_domain_disp_fn fn); acpi_gsi_domain_disp_fn acpi_get_gsi_dispatcher(void); void acpi_set_gsi_to_irq_fallback(u32 (*)(u32)); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, unsigned int size, struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data); #ifdef CONFIG_X86_IO_APIC extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #else static inline int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) { return -1; } #endif /* * This function undoes the effect of one call to acpi_register_gsi(). * If this matches the last registration, any IRQ resources for gsi * are freed. */ void acpi_unregister_gsi (u32 gsi); struct pci_dev; struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin); int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); #ifdef CONFIG_PCI void acpi_penalize_sci_irq(int irq, int trigger, int polarity); #else static inline void acpi_penalize_sci_irq(int irq, int trigger, int polarity) { } #endif void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, u8 *rdata, unsigned rdata_len); extern acpi_handle ec_get_handle(void); extern bool acpi_is_pnp_device(struct acpi_device *); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) typedef void (*wmi_notify_handler) (union acpi_object *data, void *context); int wmi_instance_count(const char *guid); extern acpi_status wmi_evaluate_method(const char *guid, u8 instance, u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out); extern acpi_status wmi_query_block(const char *guid, u8 instance, struct acpi_buffer *out); extern acpi_status wmi_set_block(const char *guid, u8 instance, const struct acpi_buffer *in); extern acpi_status wmi_install_notify_handler(const char *guid, wmi_notify_handler handler, void *data); extern acpi_status wmi_remove_notify_handler(const char *guid); extern bool wmi_has_guid(const char *guid); extern char *wmi_get_acpi_device_uid(const char *guid); #endif /* CONFIG_ACPI_WMI */ #define ACPI_VIDEO_OUTPUT_SWITCHING 0x0001 #define ACPI_VIDEO_DEVICE_POSTING 0x0002 #define ACPI_VIDEO_ROM_AVAILABLE 0x0004 #define ACPI_VIDEO_BACKLIGHT 0x0008 #define ACPI_VIDEO_BACKLIGHT_FORCE_VENDOR 0x0010 #define ACPI_VIDEO_BACKLIGHT_FORCE_VIDEO 0x0020 #define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VENDOR 0x0040 #define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO 0x0080 #define ACPI_VIDEO_BACKLIGHT_DMI_VENDOR 0x0100 #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_THERMAL_LIB int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp); int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp); int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_ACPI_HMAT int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord); #else static inline int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord) { return -EOPNOTSUPP; } #endif #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); /** * pxm_to_online_node - Map proximity ID to online node * @pxm: ACPI proximity ID * * This is similar to pxm_to_node(), but always returns an online * node. When the mapped node from a given proximity ID is offline, it * looks up the node distance table and returns the nearest online node. * * ACPI device drivers, which are called after the NUMA initialization has * completed in the kernel, can call this interface to obtain their device * NUMA topology from ACPI tables. Such drivers do not have to deal with * offline nodes. A node may be offline when SRAT memory entry does not exist, * or NUMA is disabled, ex. "numa=off" on x86. */ static inline int pxm_to_online_node(int pxm) { int node = pxm_to_node(pxm); return numa_map_to_online_node(node); } #else static inline int pxm_to_online_node(int pxm) { return 0; } static inline int acpi_map_pxm_to_node(int pxm) { return 0; } static inline int acpi_get_node(acpi_handle handle) { return 0; } #endif extern int pnpacpi_disabled; #define PXM_INVAL (-1) bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_address_space(struct acpi_resource *ares, struct resource_win *win); bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, struct resource_win *win); unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable, u8 wake_capable); unsigned int acpi_dev_get_irq_type(int triggering, int polarity); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); void acpi_dev_free_resource_list(struct list_head *list); int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, int (*preproc)(struct acpi_resource *, void *), void *preproc_data); int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list); int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list); int acpi_dev_filter_resource_type(struct acpi_resource *ares, unsigned long types); static inline int acpi_dev_filter_resource_type_cb(struct acpi_resource *ares, void *arg) { return acpi_dev_filter_resource_type(ares, (unsigned long)arg); } struct acpi_device *acpi_resource_consumer(struct resource *res); int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION extern int acpi_check_s4_hw_signature; #endif #ifdef CONFIG_PM_SLEEP void __init acpi_old_suspend_ordering(void); void __init acpi_nvs_nosave(void); void __init acpi_nvs_nosave_s3(void); void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */ int acpi_register_wakeup_handler( int wake_irq, bool (*wakeup)(void *context), void *context); void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context); struct acpi_osc_context { char *uuid_str; /* UUID string */ int rev; struct acpi_buffer cap; /* list of DWORD capabilities */ struct acpi_buffer ret; /* free by caller if success */ }; acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); /* Number of _OSC capability DWORDS depends on bridge type */ #define OSC_PCI_CAPABILITY_DWORDS 3 #define OSC_CXL_CAPABILITY_DWORDS 5 /* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ #define OSC_EXT_SUPPORT_DWORD 3 /* DWORD 4 */ #define OSC_EXT_CONTROL_DWORD 4 /* DWORD 5 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ #define OSC_REQUEST_ERROR 0x00000002 /* return */ #define OSC_INVALID_UUID_ERROR 0x00000004 /* return */ #define OSC_INVALID_REVISION_ERROR 0x00000008 /* return */ #define OSC_CAPABILITIES_MASK_ERROR 0x00000010 /* return */ /* Platform-Wide Capabilities _OSC: Capabilities DWORD 2: Support Field */ #define OSC_SB_PAD_SUPPORT 0x00000001 #define OSC_SB_PPC_OST_SUPPORT 0x00000002 #define OSC_SB_PR3_SUPPORT 0x00000004 #define OSC_SB_HOTPLUG_OST_SUPPORT 0x00000008 #define OSC_SB_APEI_SUPPORT 0x00000010 #define OSC_SB_CPC_SUPPORT 0x00000020 #define OSC_SB_CPCV2_SUPPORT 0x00000040 #define OSC_SB_PCLPI_SUPPORT 0x00000080 #define OSC_SB_OSLPI_SUPPORT 0x00000100 #define OSC_SB_FAST_THERMAL_SAMPLING_SUPPORT 0x00000200 #define OSC_SB_OVER_16_PSTATES_SUPPORT 0x00000400 #define OSC_SB_GED_SUPPORT 0x00000800 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 #define OSC_SB_IRQ_RESOURCE_SOURCE_SUPPORT 0x00002000 #define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000 #define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00020000 #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 #define OSC_SB_BATTERY_CHARGE_LIMITING_SUPPORT 0x00080000 #define OSC_SB_PRM_SUPPORT 0x00200000 #define OSC_SB_FFH_OPR_SUPPORT 0x00400000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; extern bool osc_sb_native_usb4_support_confirmed; extern bool osc_sb_cppc2_support_acked; extern bool osc_cpc_flexible_adr_space_confirmed; /* USB4 Capabilities */ #define OSC_USB_USB3_TUNNELING 0x00000001 #define OSC_USB_DP_TUNNELING 0x00000002 #define OSC_USB_PCIE_TUNNELING 0x00000004 #define OSC_USB_XDOMAIN 0x00000008 extern u32 osc_sb_native_usb4_control; /* PCI Host Bridge _OSC: Capabilities DWORD 2: Support Field */ #define OSC_PCI_EXT_CONFIG_SUPPORT 0x00000001 #define OSC_PCI_ASPM_SUPPORT 0x00000002 #define OSC_PCI_CLOCK_PM_SUPPORT 0x00000004 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT 0x00000008 #define OSC_PCI_MSI_SUPPORT 0x00000010 #define OSC_PCI_EDR_SUPPORT 0x00000080 #define OSC_PCI_HPX_TYPE_3_SUPPORT 0x00000100 /* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */ #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 0x00000001 #define OSC_PCI_SHPC_NATIVE_HP_CONTROL 0x00000002 #define OSC_PCI_EXPRESS_PME_CONTROL 0x00000004 #define OSC_PCI_EXPRESS_AER_CONTROL 0x00000008 #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL 0x00000010 #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 /* CXL _OSC: Capabilities DWORD 4: Support Field */ #define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT 0x00000001 #define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT 0x00000002 #define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT 0x00000004 #define OSC_CXL_NATIVE_HP_SUPPORT 0x00000008 /* CXL _OSC: Capabilities DWORD 5: Control Field */ #define OSC_CXL_ERROR_REPORTING_CONTROL 0x00000001 static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) { u32 *ret = context->ret.pointer; return ret[OSC_CONTROL_DWORD]; } static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) { u32 *ret = context->ret.pointer; return ret[OSC_EXT_CONTROL_DWORD]; } #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 #define ACPI_GSB_ACCESS_ATTRIB_WORD 0x00000008 #define ACPI_GSB_ACCESS_ATTRIB_BLOCK 0x0000000A #define ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE 0x0000000B #define ACPI_GSB_ACCESS_ATTRIB_WORD_CALL 0x0000000C #define ACPI_GSB_ACCESS_ATTRIB_BLOCK_CALL 0x0000000D #define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES 0x0000000E #define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS 0x0000000F /* Enable _OST when all relevant hotplug operations are enabled */ #if defined(CONFIG_ACPI_HOTPLUG_CPU) && \ defined(CONFIG_ACPI_HOTPLUG_MEMORY) && \ defined(CONFIG_ACPI_CONTAINER) #define ACPI_HOTPLUG_OST #endif /* _OST Source Event Code (OSPM Action) */ #define ACPI_OST_EC_OSPM_SHUTDOWN 0x100 #define ACPI_OST_EC_OSPM_EJECT 0x103 #define ACPI_OST_EC_OSPM_INSERTION 0x200 /* _OST General Processing Status Code */ #define ACPI_OST_SC_SUCCESS 0x0 #define ACPI_OST_SC_NON_SPECIFIC_FAILURE 0x1 #define ACPI_OST_SC_UNRECOGNIZED_NOTIFY 0x2 /* _OST OS Shutdown Processing (0x100) Status Code */ #define ACPI_OST_SC_OS_SHUTDOWN_DENIED 0x80 #define ACPI_OST_SC_OS_SHUTDOWN_IN_PROGRESS 0x81 #define ACPI_OST_SC_OS_SHUTDOWN_COMPLETED 0x82 #define ACPI_OST_SC_OS_SHUTDOWN_NOT_SUPPORTED 0x83 /* _OST Ejection Request (0x3, 0x103) Status Code */ #define ACPI_OST_SC_EJECT_NOT_SUPPORTED 0x80 #define ACPI_OST_SC_DEVICE_IN_USE 0x81 #define ACPI_OST_SC_DEVICE_BUSY 0x82 #define ACPI_OST_SC_EJECT_DEPENDENCY_BUSY 0x83 #define ACPI_OST_SC_EJECT_IN_PROGRESS 0x84 /* _OST Insertion Request (0x200) Status Code */ #define ACPI_OST_SC_INSERT_IN_PROGRESS 0x80 #define ACPI_OST_SC_DRIVER_LOAD_FAILURE 0x81 #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 enum acpi_predicate { all_versions, less_than_or_equal, equal, greater_than_or_equal, }; /* Table must be terminted by a NULL entry */ struct acpi_platform_list { char oem_id[ACPI_OEM_ID_SIZE+1]; char oem_table_id[ACPI_OEM_TABLE_ID_SIZE+1]; u32 oem_revision; char *table; enum acpi_predicate pred; char *reason; u32 data; }; int acpi_match_platform_list(const struct acpi_platform_list *plat); extern void acpi_early_init(void); extern void acpi_subsystem_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data); const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids, const struct acpi_device *adev); const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const struct device *dev); const void *acpi_device_get_match_data(const struct device *dev); extern bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv); int acpi_device_uevent_modalias(const struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); struct platform_device *acpi_create_platform_device(struct acpi_device *, const struct property_entry *); #define ACPI_PTR(_ptr) (_ptr) static inline void acpi_device_set_enumerated(struct acpi_device *adev) { adev->flags.visited = true; } static inline void acpi_device_clear_enumerated(struct acpi_device *adev) { adev->flags.visited = false; } enum acpi_reconfig_event { ACPI_RECONFIG_DEVICE_ADD = 0, ACPI_RECONFIG_DEVICE_REMOVE, }; int acpi_reconfig_notifier_register(struct notifier_block *nb); int acpi_reconfig_notifier_unregister(struct notifier_block *nb); #ifdef CONFIG_ACPI_GTDT int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count); int acpi_gtdt_map_ppi(int type); bool acpi_gtdt_c3stop(int type); #endif #ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER static __always_inline void acpi_arch_set_root_pointer(u64 addr) { } #endif #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER static __always_inline u64 acpi_arch_get_root_pointer(void) { return 0; } #endif int acpi_get_local_u64_address(acpi_handle handle, u64 *addr); int acpi_get_local_address(acpi_handle handle, u32 *addr); const char *acpi_get_subsystem_id(acpi_handle handle); #ifdef CONFIG_ACPI_MRRM int acpi_mrrm_max_mem_region(void); #endif #else /* !CONFIG_ACPI */ #define acpi_disabled 1 #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) #define ACPI_HANDLE_FWNODE(fwnode) (NULL) /* Get rid of the -Wunused-variable for adev */ #define acpi_dev_uid_match(adev, uid2) (adev && false) #define acpi_dev_hid_uid_match(adev, hid2, uid2) (adev && false) struct fwnode_handle; static inline bool acpi_dev_found(const char *hid) { return false; } static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) { return false; } struct acpi_device; static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer) { return -ENODEV; } static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { return NULL; } static inline bool acpi_reduced_hardware(void) { return false; } static inline void acpi_dev_put(struct acpi_device *adev) {} static inline bool is_acpi_node(const struct fwnode_handle *fwnode) { return false; } static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode) { return false; } static inline struct acpi_device *to_acpi_device_node(const struct fwnode_handle *fwnode) { return NULL; } static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode) { return false; } static inline struct acpi_data_node *to_acpi_data_node(const struct fwnode_handle *fwnode) { return NULL; } static inline bool acpi_data_node_match(const struct fwnode_handle *fwnode, const char *name) { return false; } static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev) { return NULL; } static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return NULL; } static inline bool has_acpi_companion(struct device *dev) { return false; } static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { } static inline const char *acpi_dev_name(struct acpi_device *adev) { return NULL; } static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev) { return NULL; } static inline void acpi_early_init(void) { } static inline void acpi_subsystem_init(void) { } static inline int early_acpi_boot_init(void) { return 0; } static inline int acpi_boot_init(void) { return 0; } static inline void acpi_boot_table_prepare(void) { } static inline void acpi_boot_table_init(void) { } static inline int acpi_mps_check(void) { return 0; } static inline int acpi_check_resource_conflict(struct resource *res) { return 0; } static inline int acpi_check_region(resource_size_t start, resource_size_t n, const char *name) { return 0; } struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) { return -ENODEV; } static inline int acpi_nvs_register(__u64 start, __u64 size) { return 0; } static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data) { return 0; } struct acpi_device_id; static inline const struct acpi_device_id *acpi_match_acpi_device( const struct acpi_device_id *ids, const struct acpi_device *adev) { return NULL; } static inline const struct acpi_device_id *acpi_match_device( const struct acpi_device_id *ids, const struct device *dev) { return NULL; } static inline const void *acpi_device_get_match_data(const struct device *dev) { return NULL; } static inline bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv) { return false; } static inline bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs) { return false; } static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { return NULL; } static inline union acpi_object *acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4, acpi_object_type type) { return NULL; } static inline int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return -ENODEV; } static inline int acpi_device_modalias(struct device *dev, char *buf, int size) { return -ENODEV; } static inline struct platform_device * acpi_create_platform_device(struct acpi_device *adev, const struct property_entry *properties) { return NULL; } static inline bool acpi_dma_supported(const struct acpi_device *adev) { return false; } static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) { return DEV_DMA_NOT_SUPPORTED; } static inline int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) { return -ENODEV; } static inline int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) { return 0; } static inline int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id) { return 0; } #define ACPI_PTR(_ptr) (NULL) static inline void acpi_device_set_enumerated(struct acpi_device *adev) { } static inline void acpi_device_clear_enumerated(struct acpi_device *adev) { } static inline int acpi_reconfig_notifier_register(struct notifier_block *nb) { return -EINVAL; } static inline int acpi_reconfig_notifier_unregister(struct notifier_block *nb) { return -EINVAL; } static inline struct acpi_device *acpi_resource_consumer(struct resource *res) { return NULL; } static inline int acpi_get_local_address(acpi_handle handle, u32 *addr) { return -ENODEV; } static inline const char *acpi_get_subsystem_id(acpi_handle handle) { return ERR_PTR(-ENODEV); } static inline int acpi_register_wakeup_handler(int wake_irq, bool (*wakeup)(void *context), void *context) { return -ENXIO; } static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } struct acpi_osc_context; static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) { return 0; } static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) { return 0; } static inline bool acpi_sleep_state_supported(u8 sleep_state) { return false; } static inline acpi_handle acpi_get_processor_handle(int cpu) { return NULL; } static inline int acpi_mrrm_max_mem_region(void) { return 1; } #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HMAT int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, resource_size_t *size); #else static inline int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, resource_size_t *size) { return -EOPNOTSUPP; } #endif extern void arch_post_acpi_subsys_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_ioapic_add(acpi_handle root); #else static inline int acpi_ioapic_add(acpi_handle root) { return 0; } #endif #ifdef CONFIG_ACPI void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, u32 pm1a_ctrl, u32 pm1b_ctrl)); acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, u32 val_a, u32 val_b)); acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); void (*check)(void); void (*restore)(void); }; #if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); #else /* CONFIG_SUSPEND && CONFIG_X86 */ static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg) { return -ENODEV; } static inline void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg) { } #endif /* CONFIG_SUSPEND && CONFIG_X86 */ void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM) int acpi_dev_suspend(struct device *dev, bool wakeup); int acpi_dev_resume(struct device *dev); int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); int acpi_dev_pm_attach(struct device *dev, bool power_on); bool acpi_storage_d3(struct device *dev); bool acpi_dev_state_d0(struct device *dev); #else static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) { return 0; } static inline bool acpi_storage_d3(struct device *dev) { return false; } static inline bool acpi_dev_state_d0(struct device *dev) { return true; } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP) int acpi_subsys_prepare(struct device *dev); void acpi_subsys_complete(struct device *dev); int acpi_subsys_suspend_late(struct device *dev); int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); int acpi_subsys_restore_early(struct device *dev); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } static inline int acpi_subsys_restore_early(struct device *dev) { return 0; } #endif #if defined(CONFIG_ACPI_EC) && defined(CONFIG_PM_SLEEP) void acpi_ec_mark_gpe_for_wake(void); void acpi_ec_set_gpe_wake_mask(u8 action); #else static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif #ifdef CONFIG_ACPI char *acpi_handle_path(acpi_handle handle); __printf(3, 4) void acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...); void acpi_evaluation_failure_warn(acpi_handle handle, const char *name, acpi_status status); #else /* !CONFIG_ACPI */ static inline __printf(3, 4) void acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {} static inline void acpi_evaluation_failure_warn(acpi_handle handle, const char *name, acpi_status status) {} #endif /* !CONFIG_ACPI */ #if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG) __printf(3, 4) void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...); #endif /* * acpi_handle_<level>: Print message with ACPI prefix and object path * * These interfaces acquire the global namespace mutex to obtain an object * path. In interrupt context, it shows the object path as <n/a>. */ #define acpi_handle_emerg(handle, fmt, ...) \ acpi_handle_printk(KERN_EMERG, handle, fmt, ##__VA_ARGS__) #define acpi_handle_alert(handle, fmt, ...) \ acpi_handle_printk(KERN_ALERT, handle, fmt, ##__VA_ARGS__) #define acpi_handle_crit(handle, fmt, ...) \ acpi_handle_printk(KERN_CRIT, handle, fmt, ##__VA_ARGS__) #define acpi_handle_err(handle, fmt, ...) \ acpi_handle_printk(KERN_ERR, handle, fmt, ##__VA_ARGS__) #define acpi_handle_warn(handle, fmt, ...) \ acpi_handle_printk(KERN_WARNING, handle, fmt, ##__VA_ARGS__) #define acpi_handle_notice(handle, fmt, ...) \ acpi_handle_printk(KERN_NOTICE, handle, fmt, ##__VA_ARGS__) #define acpi_handle_info(handle, fmt, ...) \ acpi_handle_printk(KERN_INFO, handle, fmt, ##__VA_ARGS__) #if defined(DEBUG) #define acpi_handle_debug(handle, fmt, ...) \ acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__) #else #if defined(CONFIG_DYNAMIC_DEBUG) #define acpi_handle_debug(handle, fmt, ...) \ _dynamic_func_call(fmt, __acpi_handle_debug, \ handle, pr_fmt(fmt), ##__VA_ARGS__) #else #define acpi_handle_debug(handle, fmt, ...) \ ({ \ if (0) \ acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__); \ 0; \ }) #endif #endif #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB) bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); bool acpi_gpio_get_io_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable) { return -ENXIO; } #endif static inline int acpi_dev_gpio_irq_wake_get(struct acpi_device *adev, int index, bool *wake_capable) { return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, wake_capable); } static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *con_id, int index) { return acpi_dev_gpio_irq_wake_get_by(adev, con_id, index, NULL); } static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, NULL); } /* Device properties */ #ifdef CONFIG_ACPI int acpi_dev_get_property(const struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct fwnode_reference_args *args); static inline int acpi_node_get_property_reference( const struct fwnode_handle *fwnode, const char *name, size_t index, struct fwnode_reference_args *args) { return __acpi_node_get_property_reference(fwnode, name, index, NR_FWNODE_REFERENCE_ARGS, args); } static inline bool acpi_dev_has_props(const struct acpi_device *adev) { return !list_empty(&adev->data.properties); } struct acpi_device_properties * acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid, union acpi_object *properties); int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, struct acpi_probe_entry *); #define ACPI_TABLE_ID_LEN 5 /** * struct acpi_probe_entry - boot-time probing entry * @id: ACPI table name * @type: Optional subtable type to match * (if @id contains subtables) * @subtable_valid: Optional callback to check the validity of * the subtable * @probe_table: Callback to the driver being probed when table * match is successful * @probe_subtbl: Callback to the driver being probed when table and * subtable match (and optional callback is successful) * @driver_data: Sideband data provided back to the driver */ struct acpi_probe_entry { __u8 id[ACPI_TABLE_ID_LEN]; __u8 type; acpi_probe_entry_validate_subtbl subtable_valid; union { acpi_tbl_table_handler probe_table; acpi_tbl_entry_handler probe_subtbl; }; kernel_ulong_t driver_data; }; void arch_sort_irqchip_probe(struct acpi_probe_entry *ap_head, int nr); #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ .probe_table = fn, \ .driver_data = data, \ } #define ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(table, name, table_id, \ subtable, valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ .probe_subtbl = fn, \ .driver_data = data, \ } #define ACPI_PROBE_TABLE(name) __##name##_acpi_probe_table #define ACPI_PROBE_TABLE_END(name) __##name##_acpi_probe_table_end int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr); #define acpi_probe_device_table(t) \ ({ \ extern struct acpi_probe_entry ACPI_PROBE_TABLE(t), \ ACPI_PROBE_TABLE_END(t); \ __acpi_probe_device_table(&ACPI_PROBE_TABLE(t), \ (&ACPI_PROBE_TABLE_END(t) - \ &ACPI_PROBE_TABLE(t))); \ }) #else static inline int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj) { return -ENXIO; } static inline int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct fwnode_reference_args *args) { return -ENXIO; } static inline int acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, struct fwnode_reference_args *args) { return -ENXIO; } static inline int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr) { return -ENXIO; } static inline struct fwnode_handle * acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return ERR_PTR(-ENXIO); } static inline int acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle **remote, struct fwnode_handle **port, struct fwnode_handle **endpoint) { return -ENXIO; } #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ = { (void *) table_id, \ (void *) subtable, \ (void *) valid, \ (void *) fn, \ (void *) data } #define acpi_probe_device_table(t) ({ int __r = 0; __r;}) #endif #ifdef CONFIG_ACPI_TABLE_UPGRADE void acpi_table_upgrade(void); #else static inline void acpi_table_upgrade(void) { } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_ACPI_WATCHDOG) extern bool acpi_has_watchdog(void); #else static inline bool acpi_has_watchdog(void) { return false; } #endif #ifdef CONFIG_ACPI_SPCR_TABLE extern bool qdf2400_e44_present; int acpi_parse_spcr(bool enable_earlycon, bool enable_console); #else static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console) { return -ENODEV; } #endif #if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI) int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res); const struct cpumask *acpi_irq_get_affinity(acpi_handle handle, unsigned int index); #else static inline int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res) { return -EINVAL; } static inline const struct cpumask *acpi_irq_get_affinity(acpi_handle handle, unsigned int index) { return NULL; } #endif #ifdef CONFIG_ACPI_LPIT int lpit_read_residency_count_address(u64 *address); #else static inline int lpit_read_residency_count_address(u64 *address) { return -EINVAL; } #endif #ifdef CONFIG_ACPI_PROCESSOR_IDLE #ifndef arch_get_idle_state_flags static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) { return 0; } #endif #endif /* CONFIG_ACPI_PROCESSOR_IDLE */ #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); int find_acpi_cache_level_from_id(u32 cache_id); int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_topology(unsigned int cpu, int level) { return -EINVAL; } static inline int find_acpi_cpu_topology_cluster(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_topology_package(unsigned int cpu) { return -EINVAL; } static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } static inline void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) { } static inline int find_acpi_cache_level_from_id(u32 cache_id) { return -ENOENT; } static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus) { return -ENOENT; } #endif void acpi_arch_init(void); #ifdef CONFIG_ACPI_PCC void acpi_init_pcc(void); #else static inline void acpi_init_pcc(void) { } #endif #ifdef CONFIG_ACPI_FFH void acpi_init_ffh(void); extern int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt); extern int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context); #else static inline void acpi_init_ffh(void) { } #endif #ifdef CONFIG_ACPI extern void acpi_device_notify(struct device *dev); extern void acpi_device_notify_remove(struct device *dev); #else static inline void acpi_device_notify(struct device *dev) { } static inline void acpi_device_notify_remove(struct device *dev) { } #endif static inline void acpi_use_parent_companion(struct device *dev) { ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } #ifdef CONFIG_ACPI_NUMA bool acpi_node_backed_by_real_pxm(int nid); #else static inline bool acpi_node_backed_by_real_pxm(int nid) { return false; } #endif #endif /*_LINUX_ACPI_H*/ |
| 8 8 8 12 13 13 13 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <net/netrom.h> static void nr_heartbeat_expiry(struct timer_list *); static void nr_t1timer_expiry(struct timer_list *); static void nr_t2timer_expiry(struct timer_list *); static void nr_t4timer_expiry(struct timer_list *); static void nr_idletimer_expiry(struct timer_list *); void nr_init_timers(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); timer_setup(&nr->t1timer, nr_t1timer_expiry, 0); timer_setup(&nr->t2timer, nr_t2timer_expiry, 0); timer_setup(&nr->t4timer, nr_t4timer_expiry, 0); timer_setup(&nr->idletimer, nr_idletimer_expiry, 0); /* initialized by sock_init_data */ sk->sk_timer.function = nr_heartbeat_expiry; } void nr_start_t1timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); } void nr_start_t2timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); } void nr_start_t4timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); } void nr_start_idletimer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); if (nr->idle > 0) sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); } void nr_start_heartbeat(struct sock *sk) { sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); } void nr_stop_t1timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t1timer); } void nr_stop_t2timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t2timer); } void nr_stop_t4timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t4timer); } void nr_stop_idletimer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->idletimer); } void nr_stop_heartbeat(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } int nr_t1timer_running(struct sock *sk) { return timer_pending(&nr_sk(sk)->t1timer); } static void nr_heartbeat_expiry(struct timer_list *t) { struct sock *sk = timer_container_of(sk, t, sk_timer); struct nr_sock *nr = nr_sk(sk); bh_lock_sock(sk); switch (nr->state) { case NR_STATE_0: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { if (sk->sk_state == TCP_LISTEN) sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; } break; case NR_STATE_3: /* * Check for the state of the receive buffer. */ if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && (nr->condition & NR_COND_OWN_RX_BUSY)) { nr->condition &= ~NR_COND_OWN_RX_BUSY; nr->condition &= ~NR_COND_ACK_PENDING; nr->vl = nr->vr; nr_write_internal(sk, NR_INFOACK); break; } break; } nr_start_heartbeat(sk); bh_unlock_sock(sk); out: sock_put(sk); } static void nr_t2timer_expiry(struct timer_list *t) { struct nr_sock *nr = timer_container_of(nr, t, t2timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); if (nr->condition & NR_COND_ACK_PENDING) { nr->condition &= ~NR_COND_ACK_PENDING; nr_enquiry_response(sk); } bh_unlock_sock(sk); sock_put(sk); } static void nr_t4timer_expiry(struct timer_list *t) { struct nr_sock *nr = timer_container_of(nr, t, t4timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); sock_put(sk); } static void nr_idletimer_expiry(struct timer_list *t) { struct nr_sock *nr = timer_container_of(nr, t, idletimer); struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_clear_queues(sk); nr->n2count = 0; nr_write_internal(sk, NR_DISCREQ); nr->state = NR_STATE_2; nr_start_t1timer(sk); nr_stop_t2timer(sk); nr_stop_t4timer(sk); sk->sk_state = TCP_CLOSE; sk->sk_err = 0; sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); sock_put(sk); } static void nr_t1timer_expiry(struct timer_list *t) { struct nr_sock *nr = timer_container_of(nr, t, t1timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); switch (nr->state) { case NR_STATE_1: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_write_internal(sk, NR_CONNREQ); } break; case NR_STATE_2: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_write_internal(sk, NR_DISCREQ); } break; case NR_STATE_3: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_requeue_frames(sk); } break; } nr_start_t1timer(sk); out: bh_unlock_sock(sk); sock_put(sk); } |
| 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | // SPDX-License-Identifier: GPL-2.0-only /* * Scalar fixed time AES core transform * * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> */ #include <crypto/aes.h> #include <crypto/algapi.h> #include <linux/module.h> static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); return aes_expandkey(ctx, in_key, key_len); } static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); unsigned long flags; /* * Temporarily disable interrupts to avoid races where cachelines are * evicted when the CPU is interrupted to do something else. */ local_irq_save(flags); aes_encrypt(ctx, out, in); local_irq_restore(flags); } static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); unsigned long flags; /* * Temporarily disable interrupts to avoid races where cachelines are * evicted when the CPU is interrupted to do something else. */ local_irq_save(flags); aes_decrypt(ctx, out, in); local_irq_restore(flags); } static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-fixed-time", .cra_priority = 100 + 1, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, .cra_cipher.cia_setkey = aesti_set_key, .cra_cipher.cia_encrypt = aesti_encrypt, .cra_cipher.cia_decrypt = aesti_decrypt }; static int __init aes_init(void) { return crypto_register_alg(&aes_alg); } static void __exit aes_fini(void) { crypto_unregister_alg(&aes_alg); } module_init(aes_init); module_exit(aes_fini); MODULE_DESCRIPTION("Generic fixed time AES"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); |
| 26 26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of the Linux kernel. * * Copyright (c) 2011-2014, Intel Corporation * Authors: Fenghua Yu <fenghua.yu@intel.com>, * H. Peter Anvin <hpa@linux.intel.com> */ #ifndef ASM_X86_ARCHRANDOM_H #define ASM_X86_ARCHRANDOM_H #include <asm/processor.h> #include <asm/cpufeature.h> #define RDRAND_RETRY_LOOPS 10 /* Unconditional execution of RDRAND and RDSEED */ static inline bool __must_check rdrand_long(unsigned long *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { asm volatile("rdrand %[out]" : "=@ccc" (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); return false; } static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; asm volatile("rdseed %[out]" : "=@ccc" (ok), [out] "=r" (*v)); return ok; } /* * These are the generic interfaces; they must not be declared if the * stubs in <linux/random.h> are to be invoked. */ static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) { return max_longs && static_cpu_has(X86_FEATURE_RDRAND) && rdrand_long(v) ? 1 : 0; } static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) { return max_longs && static_cpu_has(X86_FEATURE_RDSEED) && rdseed_long(v) ? 1 : 0; } #ifndef CONFIG_UML void x86_init_rdrand(struct cpuinfo_x86 *c); #endif #endif /* ASM_X86_ARCHRANDOM_H */ |
| 367 366 368 369 358 370 253 247 232 252 250 233 357 357 114 1 354 252 253 219 218 253 253 368 341 340 211 211 209 127 128 30 20 20 128 367 369 357 356 3 3 3 368 50 17 17 17 1 16 16 369 369 26 368 80 69 64 64 3 369 366 367 369 368 81 369 368 369 367 368 14 14 14 14 14 14 253 14 252 13 14 14 14 14 14 14 14 64 255 80 79 69 255 44 253 3 253 255 247 246 253 253 3 3 254 14 252 14 253 251 201 115 195 195 369 369 368 82 14 14 369 369 368 251 255 255 255 253 47 2 1 252 252 216 254 59 254 59 369 251 251 367 366 80 81 21 81 81 73 47 32 30 8 80 79 80 3 369 354 368 355 13 107 40 75 40 1 40 40 27 19 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * * This file is part of the SCTP kernel implementation * * These functions handle output processing. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@austin.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/kernel.h> #include <linux/wait.h> #include <linux/time.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/init.h> #include <linux/slab.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/icmp.h> #include <net/net_namespace.h> #include <linux/socket.h> /* for sa_family_t */ #include <net/sock.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/checksum.h> /* Forward declarations for private helpers. */ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk); static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk); static void sctp_packet_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk); static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len); static void sctp_packet_reset(struct sctp_packet *packet) { /* sctp_packet_transmit() relies on this to reset size to the * current overhead after sending packets. */ packet->size = packet->overhead; packet->has_cookie_echo = 0; packet->has_sack = 0; packet->has_data = 0; packet->has_auth = 0; packet->ipfragok = 0; packet->auth = NULL; } /* Config a packet. * This appears to be a followup set of initializations. */ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, int ecn_capable) { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctp_sock *sp = NULL; struct sock *sk; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); packet->vtag = vtag; /* do the following jobs only once for a flush schedule */ if (!sctp_packet_empty(packet)) return; /* set packet max_size with pathmtu, then calculate overhead */ packet->max_size = tp->pathmtu; if (asoc) { sk = asoc->base.sk; sp = sctp_sk(sk); } packet->overhead = sctp_mtu_payload(sp, 0, 0); packet->size = packet->overhead; if (!asoc) return; /* update dst or transport pathmtu if in need */ if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sp); if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); } else if (!sctp_transport_pl_enabled(tp) && asoc->param_flags & SPP_PMTUD_ENABLE) { if (!sctp_transport_pmtu_check(tp)) sctp_assoc_sync_pmtu(asoc); } if (asoc->pmtu_pending) { if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); asoc->pmtu_pending = 0; } /* If there a is a prepend chunk stick it on the list before * any other chunks get appended. */ if (ecn_capable) { struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } if (!tp->dst) return; /* set packet max_size with gso_max_size if gso is enabled*/ rcu_read_lock(); if (__sk_dst_get(sk) != tp->dst) { dst_hold(tp->dst); sk_setup_caps(sk, tp->dst); } packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size), GSO_LEGACY_MAX_SIZE) : asoc->pathmtu; rcu_read_unlock(); } /* Initialize the packet structure. */ void sctp_packet_init(struct sctp_packet *packet, struct sctp_transport *transport, __u16 sport, __u16 dport) { pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport); packet->transport = transport; packet->source_port = sport; packet->destination_port = dport; INIT_LIST_HEAD(&packet->chunk_list); /* The overhead will be calculated by sctp_packet_config() */ packet->overhead = 0; sctp_packet_reset(packet); packet->vtag = 0; } /* Free a packet. */ void sctp_packet_free(struct sctp_packet *packet) { struct sctp_chunk *chunk, *tmp; pr_debug("%s: packet:%p\n", __func__, packet); list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); sctp_chunk_free(chunk); } } /* This routine tries to append the chunk to the offered packet. If adding * the chunk causes the packet to exceed the path MTU and COOKIE_ECHO chunk * is not present in the packet, it transmits the input packet. * Data can be bundled with a packet containing a COOKIE_ECHO chunk as long * as it can fit in the packet, but any more data that does not fit in this * packet can be sent only after receiving the COOKIE_ACK. */ enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk, int one_packet, gfp_t gfp) { enum sctp_xmit retval; pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: if (!packet->has_cookie_echo) { int error = 0; error = sctp_packet_transmit(packet, gfp); if (error < 0) chunk->skb->sk->sk_err = -error; /* If we have an empty packet, then we can NOT ever * return PMTU_FULL. */ if (!one_packet) retval = sctp_packet_append_chunk(packet, chunk); } break; case SCTP_XMIT_RWND_FULL: case SCTP_XMIT_OK: case SCTP_XMIT_DELAY: break; } return retval; } /* Try to bundle a pad chunk into a packet with a heartbeat chunk for PLPMTUTD probe */ static enum sctp_xmit sctp_packet_bundle_pad(struct sctp_packet *pkt, struct sctp_chunk *chunk) { struct sctp_transport *t = pkt->transport; struct sctp_chunk *pad; int overhead = 0; if (!chunk->pmtu_probe) return SCTP_XMIT_OK; /* calculate the Padding Data size for the pad chunk */ overhead += sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); overhead += sizeof(struct sctp_sender_hb_info) + sizeof(struct sctp_pad_chunk); pad = sctp_make_pad(t->asoc, t->pl.probe_size - overhead); if (!pad) return SCTP_XMIT_DELAY; list_add_tail(&pad->list, &pkt->chunk_list); pkt->size += SCTP_PAD4(ntohs(pad->chunk_hdr->length)); chunk->transport = t; return SCTP_XMIT_OK; } /* Try to bundle an auth chunk into the packet. */ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, struct sctp_chunk *chunk) { struct sctp_association *asoc = pkt->transport->asoc; enum sctp_xmit retval = SCTP_XMIT_OK; struct sctp_chunk *auth; /* if we don't have an association, we can't do authentication */ if (!asoc) return retval; /* See if this is an auth chunk we are bundling or if * auth is already bundled. */ if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth) return retval; /* if the peer did not request this chunk to be authenticated, * don't do it */ if (!chunk->auth) return retval; auth = sctp_make_auth(asoc, chunk->shkey->key_id); if (!auth) return retval; auth->shkey = chunk->shkey; sctp_auth_shkey_hold(auth->shkey); retval = __sctp_packet_append_chunk(pkt, auth); if (retval != SCTP_XMIT_OK) sctp_chunk_free(auth); return retval; } /* Try to bundle a SACK with the packet. */ static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt, struct sctp_chunk *chunk) { enum sctp_xmit retval = SCTP_XMIT_OK; /* If sending DATA and haven't aleady bundled a SACK, try to * bundle one in to the packet. */ if (sctp_chunk_is_data(chunk) && !pkt->has_sack && !pkt->has_cookie_echo) { struct sctp_association *asoc; struct timer_list *timer; asoc = pkt->transport->asoc; timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; /* If the SACK timer is running, we have a pending SACK */ if (timer_pending(timer)) { struct sctp_chunk *sack; if (pkt->transport->sack_generation != pkt->transport->asoc->peer.sack_generation) return retval; asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { retval = __sctp_packet_append_chunk(pkt, sack); if (retval != SCTP_XMIT_OK) { sctp_chunk_free(sack); goto out; } SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS); asoc->stats.octrlchunks++; asoc->peer.sack_needed = 0; if (timer_delete(timer)) sctp_association_put(asoc); } } } out: return retval; } /* Append a chunk to the offered packet reporting back any inability to do * so. */ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); enum sctp_xmit retval = SCTP_XMIT_OK; /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); if (retval != SCTP_XMIT_OK) goto finish; /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { case SCTP_CID_DATA: case SCTP_CID_I_DATA: /* Account for the data being in the packet */ sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; /* Disallow AUTH bundling after DATA */ packet->has_auth = 1; /* Let it be knows that packet has DATA in it */ packet->has_data = 1; /* timestamp the chunk for rtx purposes */ chunk->sent_at = jiffies; /* Mainly used for prsctp RTX policy */ chunk->sent_count++; break; case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; break; case SCTP_CID_SACK: packet->has_sack = 1; if (chunk->asoc) chunk->asoc->stats.osacks++; break; case SCTP_CID_AUTH: packet->has_auth = 1; packet->auth = chunk; break; } /* It is OK to send this chunk. */ list_add_tail(&chunk->list, &packet->chunk_list); packet->size += chunk_len; chunk->transport = packet->transport; finish: return retval; } /* Append a chunk to the offered packet reporting back any inability to do * so. */ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { enum sctp_xmit retval = SCTP_XMIT_OK; pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); /* Data chunks are special. Before seeing what else we can * bundle into this packet, check to see if we are allowed to * send this DATA. */ if (sctp_chunk_is_data(chunk)) { retval = sctp_packet_can_append_data(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; } /* Try to bundle AUTH chunk */ retval = sctp_packet_bundle_auth(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; /* Try to bundle SACK chunk */ retval = sctp_packet_bundle_sack(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; retval = __sctp_packet_append_chunk(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; retval = sctp_packet_bundle_pad(packet, chunk); finish: return retval; } static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb) { if (SCTP_OUTPUT_CB(head)->last == head) skb_shinfo(head)->frag_list = skb; else SCTP_OUTPUT_CB(head)->last->next = skb; SCTP_OUTPUT_CB(head)->last = skb; head->truesize += skb->truesize; head->data_len += skb->len; head->len += skb->len; refcount_add(skb->truesize, &head->sk->sk_wmem_alloc); __skb_header_release(skb); } static int sctp_packet_pack(struct sctp_packet *packet, struct sk_buff *head, int gso, gfp_t gfp) { struct sctp_transport *tp = packet->transport; struct sctp_auth_chunk *auth = NULL; struct sctp_chunk *chunk, *tmp; int pkt_count = 0, pkt_size; struct sock *sk = head->sk; struct sk_buff *nskb; int auth_len = 0; if (gso) { skb_shinfo(head)->gso_type = sk->sk_gso_type; SCTP_OUTPUT_CB(head)->last = head; } else { nskb = head; pkt_size = packet->size; goto merge; } do { /* calculate the pkt_size and alloc nskb */ pkt_size = packet->overhead; list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { int padded = SCTP_PAD4(chunk->skb->len); if (chunk == packet->auth) auth_len = padded; else if (auth_len + padded + packet->overhead > tp->pathmtu) return 0; else if (pkt_size + padded > tp->pathmtu) break; pkt_size += padded; } nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); if (!nskb) return 0; skb_reserve(nskb, packet->overhead + MAX_HEADER); merge: /* merge chunks into nskb and append nskb into head list */ pkt_size -= packet->overhead; list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { int padding; list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { if (!sctp_chunk_retransmitted(chunk) && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } } padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) skb_put_zero(chunk->skb, padding); if (chunk == packet->auth) auth = (struct sctp_auth_chunk *) skb_tail_pointer(nskb); skb_put_data(nskb, chunk->skb->data, chunk->skb->len); pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", chunk, sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), chunk->has_tsn ? "TSN" : "No TSN", chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, ntohs(chunk->chunk_hdr->length), chunk->skb->len, chunk->rtt_in_progress); pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); if (!pkt_size) break; } if (auth) { sctp_auth_calculate_hmac(tp->asoc, nskb, auth, packet->auth->shkey, gfp); /* free auth if no more chunks, or add it back */ if (list_empty(&packet->chunk_list)) sctp_chunk_free(packet->auth); else list_add(&packet->auth->list, &packet->chunk_list); } if (gso) sctp_packet_gso_append(head, nskb); pkt_count++; } while (!list_empty(&packet->chunk_list)); if (gso) { memset(head->cb, 0, max(sizeof(struct inet_skb_parm), sizeof(struct inet6_skb_parm))); skb_shinfo(head)->gso_segs = pkt_count; skb_shinfo(head)->gso_size = GSO_BY_FRAGS; goto chksum; } if (sctp_checksum_disable) return 1; if (!(tp->dst->dev->features & NETIF_F_SCTP_CRC) || dst_xfrm(tp->dst) || packet->ipfragok || tp->encap_port) { struct sctphdr *sh = (struct sctphdr *)skb_transport_header(head); sh->checksum = sctp_compute_cksum(head, 0); } else { chksum: head->ip_summed = CHECKSUM_PARTIAL; head->csum_not_inet = 1; head->csum_start = skb_transport_header(head) - head->head; head->csum_offset = offsetof(struct sctphdr, checksum); } return pkt_count; } /* All packets are sent to the network through this function from * sctp_outq_tail(). * * The return value is always 0 for now. */ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; struct sk_buff *head; struct sctphdr *sh; struct sock *sk; pr_debug("%s: packet:%p\n", __func__, packet); if (list_empty(&packet->chunk_list)) return 0; chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); sk = chunk->skb->sk; if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) { if (tp->pl.state == SCTP_PL_ERROR) { /* do IP fragmentation if in Error state */ packet->ipfragok = 1; } else { if (!sk_can_gso(sk)) { /* check gso */ pr_err_once("Trying to GSO but underlying device doesn't support it."); goto out; } gso = 1; } } /* alloc head skb */ head = alloc_skb((gso ? packet->overhead : packet->size) + MAX_HEADER, gfp); if (!head) goto out; skb_reserve(head, packet->overhead + MAX_HEADER); skb_set_owner_w(head, sk); /* set sctp header */ sh = skb_push(head, sizeof(struct sctphdr)); skb_reset_transport_header(head); sh->source = htons(packet->source_port); sh->dest = htons(packet->destination_port); sh->vtag = htonl(packet->vtag); sh->checksum = 0; /* drop packet if no dst */ if (!tp->dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(head); goto out; } /* pack up chunks */ pkt_count = sctp_packet_pack(packet, head, gso, gfp); if (!pkt_count) { kfree_skb(head); goto out; } pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len); /* start autoclose timer */ if (packet->has_data && sctp_state(asoc, ESTABLISHED) && asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { struct timer_list *timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; unsigned long timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; if (!mod_timer(timer, jiffies + timeout)) sctp_association_hold(asoc); } /* sctp xmit */ tp->af_specific->ecn_capable(sk); if (asoc) { asoc->stats.opackets += pkt_count; if (asoc->peer.last_sent_to != tp) asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; if (tp->dst_pending_confirm) skb_set_dst_pending_confirm(head, 1); /* neighbour should be confirmed on successful transmission or * positive error */ if (tp->af_specific->sctp_xmit(head, tp) >= 0 && tp->dst_pending_confirm) tp->dst_pending_confirm = 0; out: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (!sctp_chunk_is_data(chunk)) sctp_chunk_free(chunk); } sctp_packet_reset(packet); return 0; } /******************************************************************** * 2nd Level Abstractions ********************************************************************/ /* This private function check to see if a chunk can be added */ static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk) { size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; struct sctp_association *asoc = transport->asoc; struct sctp_outq *q = &asoc->outqueue; /* RFC 2960 6.1 Transmission of DATA Chunks * * A) At any given time, the data sender MUST NOT transmit new data to * any destination transport address if its peer's rwnd indicates * that the peer has no buffer space (i.e. rwnd is 0, see Section * 6.2.1). However, regardless of the value of rwnd (including if it * is 0), the data sender can always have one DATA chunk in flight to * the receiver if allowed by cwnd (see rule B below). This rule * allows the sender to probe for a change in rwnd that the sender * missed due to the SACK having been lost in transit from the data * receiver to the data sender. */ rwnd = asoc->peer.rwnd; inflight = q->outstanding_bytes; flight_size = transport->flight_size; datasize = sctp_data_size(chunk); if (datasize > rwnd && inflight > 0) /* We have (at least) one data chunk in flight, * so we can't fall back to rule 6.1 B). */ return SCTP_XMIT_RWND_FULL; /* RFC 2960 6.1 Transmission of DATA Chunks * * B) At any given time, the sender MUST NOT transmit new data * to a given transport address if it has cwnd or more bytes * of data outstanding to that transport address. */ /* RFC 7.2.4 & the Implementers Guide 2.8. * * 3) ... * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ if (chunk->fast_retransmit != SCTP_NEED_FRTX && flight_size >= transport->cwnd) return SCTP_XMIT_RWND_FULL; /* Nagle's algorithm to solve small-packet problem: * Inhibit the sending of new chunks when new outgoing data arrives * if any previously transmitted data on the connection remains * unacknowledged. */ if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && !asoc->force_delay) /* Nothing unacked */ return SCTP_XMIT_OK; if (!sctp_packet_empty(packet)) /* Append to packet */ return SCTP_XMIT_OK; if (!sctp_state(asoc, ESTABLISHED)) return SCTP_XMIT_OK; /* Check whether this chunk and all the rest of pending data will fit * or delay in hopes of bundling a full sized packet. */ if (chunk->skb->len + q->out_qlen > transport->pathmtu - packet->overhead - sctp_datachk_len(&chunk->asoc->stream) - 4) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; /* Don't delay large message writes that may have been fragmented */ if (!chunk->msg->can_delay) return SCTP_XMIT_OK; /* Defer until all data acked or packet full */ return SCTP_XMIT_DELAY; } /* This private function does management things when adding DATA chunk */ static void sctp_packet_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk) { struct sctp_transport *transport = packet->transport; size_t datasize = sctp_data_size(chunk); struct sctp_association *asoc = transport->asoc; u32 rwnd = asoc->peer.rwnd; /* Keep track of how many bytes are in flight over this transport. */ transport->flight_size += datasize; /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; /* Update our view of the receiver's rwnd. */ if (datasize < rwnd) rwnd -= datasize; else rwnd = 0; asoc->peer.rwnd = rwnd; sctp_chunk_assign_tsn(chunk); asoc->stream.si->assign_number(chunk); } static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len) { enum sctp_xmit retval = SCTP_XMIT_OK; size_t psize, pmtu, maxsize; /* Don't bundle in this packet if this chunk's auth key doesn't * match other chunks already enqueued on this packet. Also, * don't bundle the chunk with auth key if other chunks in this * packet don't have auth key. */ if ((packet->auth && chunk->shkey != packet->auth->shkey) || (!packet->auth && chunk->shkey && chunk->chunk_hdr->type != SCTP_CID_AUTH)) return SCTP_XMIT_PMTU_FULL; psize = packet->size; if (packet->transport->asoc) pmtu = packet->transport->asoc->pathmtu; else pmtu = packet->transport->pathmtu; /* Decide if we need to fragment or resubmit later. */ if (psize + chunk_len > pmtu) { /* It's OK to fragment at IP level if any one of the following * is true: * 1. The packet is empty (meaning this chunk is greater * the MTU) * 2. The packet doesn't have any data in it yet and data * requires authentication. */ if (sctp_packet_empty(packet) || (!packet->has_data && chunk->auth)) { /* We no longer do re-fragmentation. * Just fragment at the IP layer, if we * actually hit this condition */ packet->ipfragok = 1; goto out; } /* Similarly, if this chunk was built before a PMTU * reduction, we have to fragment it at IP level now. So * if the packet already contains something, we need to * flush. */ maxsize = pmtu - packet->overhead; if (packet->auth) maxsize -= SCTP_PAD4(packet->auth->skb->len); if (chunk_len > maxsize) retval = SCTP_XMIT_PMTU_FULL; /* It is also okay to fragment if the chunk we are * adding is a control chunk, but only if current packet * is not a GSO one otherwise it causes fragmentation of * a large frame. So in this case we allow the * fragmentation by forcing it to be in a new packet. */ if (!sctp_chunk_is_data(chunk) && packet->has_data) retval = SCTP_XMIT_PMTU_FULL; if (psize + chunk_len > packet->max_size) /* Hit GSO/PMTU limit, gotta flush */ retval = SCTP_XMIT_PMTU_FULL; if (!packet->transport->burst_limited && psize + chunk_len > (packet->transport->cwnd >> 1)) /* Do not allow a single GSO packet to use more * than half of cwnd. */ retval = SCTP_XMIT_PMTU_FULL; if (packet->transport->burst_limited && psize + chunk_len > (packet->transport->burst_limited >> 1)) /* Do not allow a single GSO packet to use more * than half of original cwnd. */ retval = SCTP_XMIT_PMTU_FULL; /* Otherwise it will fit in the GSO packet */ } out: return retval; } |
| 1 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * ARIA Cipher Algorithm. * * Documentation of ARIA can be found in RFC 5794. * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> * * Information for ARIA * http://210.104.33.10/ARIA/index-e.html (English) * http://seed.kisa.or.kr/ (Korean) * * Public domain version is distributed above. */ #include <crypto/aria.h> #include <linux/unaligned.h> static const u32 key_rc[20] = { 0x517cc1b7, 0x27220a94, 0xfe13abe8, 0xfa9a6ee0, 0x6db14acc, 0x9e21c820, 0xff28b1d5, 0xef5de2b0, 0xdb92371d, 0x2126e970, 0x03249775, 0x04e8c90e, 0x517cc1b7, 0x27220a94, 0xfe13abe8, 0xfa9a6ee0, 0x6db14acc, 0x9e21c820, 0xff28b1d5, 0xef5de2b0 }; static void aria_set_encrypt_key(struct aria_ctx *ctx, const u8 *in_key, unsigned int key_len) { u32 w0[4], w1[4], w2[4], w3[4]; u32 reg0, reg1, reg2, reg3; const u32 *ck; int rkidx = 0; ck = &key_rc[(key_len - 16) / 2]; w0[0] = get_unaligned_be32(&in_key[0]); w0[1] = get_unaligned_be32(&in_key[4]); w0[2] = get_unaligned_be32(&in_key[8]); w0[3] = get_unaligned_be32(&in_key[12]); reg0 = w0[0] ^ ck[0]; reg1 = w0[1] ^ ck[1]; reg2 = w0[2] ^ ck[2]; reg3 = w0[3] ^ ck[3]; aria_subst_diff_odd(®0, ®1, ®2, ®3); if (key_len > 16) { w1[0] = get_unaligned_be32(&in_key[16]); w1[1] = get_unaligned_be32(&in_key[20]); if (key_len > 24) { w1[2] = get_unaligned_be32(&in_key[24]); w1[3] = get_unaligned_be32(&in_key[28]); } else { w1[2] = 0; w1[3] = 0; } } else { w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; } w1[0] ^= reg0; w1[1] ^= reg1; w1[2] ^= reg2; w1[3] ^= reg3; reg0 = w1[0]; reg1 = w1[1]; reg2 = w1[2]; reg3 = w1[3]; reg0 ^= ck[4]; reg1 ^= ck[5]; reg2 ^= ck[6]; reg3 ^= ck[7]; aria_subst_diff_even(®0, ®1, ®2, ®3); reg0 ^= w0[0]; reg1 ^= w0[1]; reg2 ^= w0[2]; reg3 ^= w0[3]; w2[0] = reg0; w2[1] = reg1; w2[2] = reg2; w2[3] = reg3; reg0 ^= ck[8]; reg1 ^= ck[9]; reg2 ^= ck[10]; reg3 ^= ck[11]; aria_subst_diff_odd(®0, ®1, ®2, ®3); w3[0] = reg0 ^ w1[0]; w3[1] = reg1 ^ w1[1]; w3[2] = reg2 ^ w1[2]; w3[3] = reg3 ^ w1[3]; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 19); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 31); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 67); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 97); if (key_len > 16) { rkidx++; aria_gsrk(ctx->enc_key[rkidx], w1, w2, 97); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w2, w3, 97); if (key_len > 24) { rkidx++; aria_gsrk(ctx->enc_key[rkidx], w3, w0, 97); rkidx++; aria_gsrk(ctx->enc_key[rkidx], w0, w1, 109); } } } static void aria_set_decrypt_key(struct aria_ctx *ctx) { int i; for (i = 0; i < 4; i++) { ctx->dec_key[0][i] = ctx->enc_key[ctx->rounds][i]; ctx->dec_key[ctx->rounds][i] = ctx->enc_key[0][i]; } for (i = 1; i < ctx->rounds; i++) { ctx->dec_key[i][0] = aria_m(ctx->enc_key[ctx->rounds - i][0]); ctx->dec_key[i][1] = aria_m(ctx->enc_key[ctx->rounds - i][1]); ctx->dec_key[i][2] = aria_m(ctx->enc_key[ctx->rounds - i][2]); ctx->dec_key[i][3] = aria_m(ctx->enc_key[ctx->rounds - i][3]); aria_diff_word(&ctx->dec_key[i][0], &ctx->dec_key[i][1], &ctx->dec_key[i][2], &ctx->dec_key[i][3]); aria_diff_byte(&ctx->dec_key[i][1], &ctx->dec_key[i][2], &ctx->dec_key[i][3]); aria_diff_word(&ctx->dec_key[i][0], &ctx->dec_key[i][1], &ctx->dec_key[i][2], &ctx->dec_key[i][3]); } } int aria_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct aria_ctx *ctx = crypto_tfm_ctx(tfm); if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; BUILD_BUG_ON(sizeof(ctx->enc_key) != 272); BUILD_BUG_ON(sizeof(ctx->dec_key) != 272); BUILD_BUG_ON(sizeof(int) != sizeof(ctx->rounds)); ctx->key_length = key_len; ctx->rounds = (key_len + 32) / 4; aria_set_encrypt_key(ctx, in_key, key_len); aria_set_decrypt_key(ctx); return 0; } EXPORT_SYMBOL_GPL(aria_set_key); static void __aria_crypt(struct aria_ctx *ctx, u8 *out, const u8 *in, u32 key[][ARIA_RD_KEY_WORDS]) { u32 reg0, reg1, reg2, reg3; int rounds, rkidx = 0; rounds = ctx->rounds; reg0 = get_unaligned_be32(&in[0]); reg1 = get_unaligned_be32(&in[4]); reg2 = get_unaligned_be32(&in[8]); reg3 = get_unaligned_be32(&in[12]); aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3); rkidx++; aria_subst_diff_odd(®0, ®1, ®2, ®3); aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3); rkidx++; while ((rounds -= 2) > 0) { aria_subst_diff_even(®0, ®1, ®2, ®3); aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3); rkidx++; aria_subst_diff_odd(®0, ®1, ®2, ®3); aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3); rkidx++; } reg0 = key[rkidx][0] ^ make_u32((u8)(x1[get_u8(reg0, 0)]), (u8)(x2[get_u8(reg0, 1)] >> 8), (u8)(s1[get_u8(reg0, 2)]), (u8)(s2[get_u8(reg0, 3)])); reg1 = key[rkidx][1] ^ make_u32((u8)(x1[get_u8(reg1, 0)]), (u8)(x2[get_u8(reg1, 1)] >> 8), (u8)(s1[get_u8(reg1, 2)]), (u8)(s2[get_u8(reg1, 3)])); reg2 = key[rkidx][2] ^ make_u32((u8)(x1[get_u8(reg2, 0)]), (u8)(x2[get_u8(reg2, 1)] >> 8), (u8)(s1[get_u8(reg2, 2)]), (u8)(s2[get_u8(reg2, 3)])); reg3 = key[rkidx][3] ^ make_u32((u8)(x1[get_u8(reg3, 0)]), (u8)(x2[get_u8(reg3, 1)] >> 8), (u8)(s1[get_u8(reg3, 2)]), (u8)(s2[get_u8(reg3, 3)])); put_unaligned_be32(reg0, &out[0]); put_unaligned_be32(reg1, &out[4]); put_unaligned_be32(reg2, &out[8]); put_unaligned_be32(reg3, &out[12]); } void aria_encrypt(void *_ctx, u8 *out, const u8 *in) { struct aria_ctx *ctx = (struct aria_ctx *)_ctx; __aria_crypt(ctx, out, in, ctx->enc_key); } EXPORT_SYMBOL_GPL(aria_encrypt); void aria_decrypt(void *_ctx, u8 *out, const u8 *in) { struct aria_ctx *ctx = (struct aria_ctx *)_ctx; __aria_crypt(ctx, out, in, ctx->dec_key); } EXPORT_SYMBOL_GPL(aria_decrypt); static void __aria_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aria_ctx *ctx = crypto_tfm_ctx(tfm); __aria_crypt(ctx, out, in, ctx->enc_key); } static void __aria_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aria_ctx *ctx = crypto_tfm_ctx(tfm); __aria_crypt(ctx, out, in, ctx->dec_key); } static struct crypto_alg aria_alg = { .cra_name = "aria", .cra_driver_name = "aria-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = ARIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aria_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = ARIA_MIN_KEY_SIZE, .cia_max_keysize = ARIA_MAX_KEY_SIZE, .cia_setkey = aria_set_key, .cia_encrypt = __aria_encrypt, .cia_decrypt = __aria_decrypt } } }; static int __init aria_init(void) { return crypto_register_alg(&aria_alg); } static void __exit aria_fini(void) { crypto_unregister_alg(&aria_alg); } module_init(aria_init); module_exit(aria_fini); MODULE_DESCRIPTION("ARIA Cipher Algorithm"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); MODULE_ALIAS_CRYPTO("aria"); MODULE_ALIAS_CRYPTO("aria-generic"); |
| 8 8 8 7 7 8 9 9 1 8 9 3 3 3 2 10 10 10 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 | /* * algif_rng: User-space interface for random number generators * * This file provides the user-space API for random number generators. * * Copyright (C) 2014, Stephan Mueller <smueller@chronox.de> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL2 * are required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include <linux/capability.h> #include <linux/module.h> #include <crypto/rng.h> #include <linux/random.h> #include <crypto/if_alg.h> #include <linux/net.h> #include <net/sock.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>"); MODULE_DESCRIPTION("User-space interface for random number generators"); struct rng_ctx { #define MAXSIZE 128 unsigned int len; struct crypto_rng *drng; u8 *addtl; size_t addtl_len; }; struct rng_parent_ctx { struct crypto_rng *drng; u8 *entropy; }; static void rng_reset_addtl(struct rng_ctx *ctx) { kfree_sensitive(ctx->addtl); ctx->addtl = NULL; ctx->addtl_len = 0; } static int _rng_recvmsg(struct crypto_rng *drng, struct msghdr *msg, size_t len, u8 *addtl, size_t addtl_len) { int err = 0; int genlen = 0; u8 result[MAXSIZE]; if (len == 0) return 0; if (len > MAXSIZE) len = MAXSIZE; /* * although not strictly needed, this is a precaution against coding * errors */ memset(result, 0, len); /* * The enforcement of a proper seeding of an RNG is done within an * RNG implementation. Some RNGs (DRBG, krng) do not need specific * seeding as they automatically seed. The X9.31 DRNG will return * an error if it was not seeded properly. */ genlen = crypto_rng_generate(drng, addtl, addtl_len, result, len); if (genlen < 0) return genlen; err = memcpy_to_msg(msg, result, len); memzero_explicit(result, len); return err ? err : len; } static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct rng_ctx *ctx = ask->private; return _rng_recvmsg(ctx->drng, msg, len, NULL, 0); } static int rng_test_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct rng_ctx *ctx = ask->private; int ret; lock_sock(sock->sk); ret = _rng_recvmsg(ctx->drng, msg, len, ctx->addtl, ctx->addtl_len); rng_reset_addtl(ctx); release_sock(sock->sk); return ret; } static int rng_test_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { int err; struct alg_sock *ask = alg_sk(sock->sk); struct rng_ctx *ctx = ask->private; lock_sock(sock->sk); if (len > MAXSIZE) { err = -EMSGSIZE; goto unlock; } rng_reset_addtl(ctx); ctx->addtl = kmalloc(len, GFP_KERNEL); if (!ctx->addtl) { err = -ENOMEM; goto unlock; } err = memcpy_from_msg(ctx->addtl, msg, len); if (err) { rng_reset_addtl(ctx); goto unlock; } ctx->addtl_len = len; unlock: release_sock(sock->sk); return err ? err : len; } static struct proto_ops algif_rng_ops = { .family = PF_ALG, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, .bind = sock_no_bind, .accept = sock_no_accept, .sendmsg = sock_no_sendmsg, .release = af_alg_release, .recvmsg = rng_recvmsg, }; static struct proto_ops __maybe_unused algif_rng_test_ops = { .family = PF_ALG, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, .bind = sock_no_bind, .accept = sock_no_accept, .release = af_alg_release, .recvmsg = rng_test_recvmsg, .sendmsg = rng_test_sendmsg, }; static void *rng_bind(const char *name, u32 type, u32 mask) { struct rng_parent_ctx *pctx; struct crypto_rng *rng; pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); if (!pctx) return ERR_PTR(-ENOMEM); rng = crypto_alloc_rng(name, type, mask); if (IS_ERR(rng)) { kfree(pctx); return ERR_CAST(rng); } pctx->drng = rng; return pctx; } static void rng_release(void *private) { struct rng_parent_ctx *pctx = private; if (unlikely(!pctx)) return; crypto_free_rng(pctx->drng); kfree_sensitive(pctx->entropy); kfree_sensitive(pctx); } static void rng_sock_destruct(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct rng_ctx *ctx = ask->private; rng_reset_addtl(ctx); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); } static int rng_accept_parent(void *private, struct sock *sk) { struct rng_ctx *ctx; struct rng_parent_ctx *pctx = private; struct alg_sock *ask = alg_sk(sk); unsigned int len = sizeof(*ctx); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; memset(ctx, 0, len); ctx->len = len; /* * No seeding done at that point -- if multiple accepts are * done on one RNG instance, each resulting FD points to the same * state of the RNG. */ ctx->drng = pctx->drng; ask->private = ctx; sk->sk_destruct = rng_sock_destruct; /* * Non NULL pctx->entropy means that CAVP test has been initiated on * this socket, replace proto_ops algif_rng_ops with algif_rng_test_ops. */ if (IS_ENABLED(CONFIG_CRYPTO_USER_API_RNG_CAVP) && pctx->entropy) sk->sk_socket->ops = &algif_rng_test_ops; return 0; } static int rng_setkey(void *private, const u8 *seed, unsigned int seedlen) { struct rng_parent_ctx *pctx = private; /* * Check whether seedlen is of sufficient size is done in RNG * implementations. */ return crypto_rng_reset(pctx->drng, seed, seedlen); } static int __maybe_unused rng_setentropy(void *private, sockptr_t entropy, unsigned int len) { struct rng_parent_ctx *pctx = private; u8 *kentropy = NULL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (pctx->entropy) return -EINVAL; if (len > MAXSIZE) return -EMSGSIZE; if (len) { kentropy = memdup_sockptr(entropy, len); if (IS_ERR(kentropy)) return PTR_ERR(kentropy); } crypto_rng_alg(pctx->drng)->set_ent(pctx->drng, kentropy, len); /* * Since rng doesn't perform any memory management for the entropy * buffer, save kentropy pointer to pctx now to free it after use. */ pctx->entropy = kentropy; return 0; } static const struct af_alg_type algif_type_rng = { .bind = rng_bind, .release = rng_release, .accept = rng_accept_parent, .setkey = rng_setkey, #ifdef CONFIG_CRYPTO_USER_API_RNG_CAVP .setentropy = rng_setentropy, #endif .ops = &algif_rng_ops, .name = "rng", .owner = THIS_MODULE }; static int __init rng_init(void) { return af_alg_register_type(&algif_type_rng); } static void __exit rng_exit(void) { int err = af_alg_unregister_type(&algif_type_rng); BUG_ON(err); } module_init(rng_init); module_exit(rng_exit); |
| 1 1 1 1 1 1 1 1 1 1 1 1 3 2 1 3 3 3 3 1 1 1 2 2 2 1 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_skbprio.c SKB Priority Queue. * * Authors: Nishanth Devarajan, <ndev2021@gmail.com> * Cody Doucette, <doucette@bu.edu> * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu */ #include <linux/string.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> #include <net/inet_ecn.h> /* SKB Priority Queue * ================================= * * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes * packets according to their skb->priority field. Under congestion, * Skbprio drops already-enqueued lower priority packets to make space * available for higher priority packets; it was conceived as a solution * for denial-of-service defenses that need to route packets with different * priorities as a mean to overcome DoS attacks. */ struct skbprio_sched_data { /* Queue state. */ struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY]; struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY]; u16 highest_prio; u16 lowest_prio; }; static u16 calc_new_high_prio(const struct skbprio_sched_data *q) { int prio; for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) { if (!skb_queue_empty(&q->qdiscs[prio])) return prio; } /* SKB queue is empty, return 0 (default highest priority setting). */ return 0; } static u16 calc_new_low_prio(const struct skbprio_sched_data *q) { int prio; for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) { if (!skb_queue_empty(&q->qdiscs[prio])) return prio; } /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1 * (default lowest priority setting). */ return SKBPRIO_MAX_PRIORITY - 1; } static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1; struct skbprio_sched_data *q = qdisc_priv(sch); struct sk_buff_head *qdisc; struct sk_buff_head *lp_qdisc; struct sk_buff *to_drop; u16 prio, lp; /* Obtain the priority of @skb. */ prio = min(skb->priority, max_priority); qdisc = &q->qdiscs[prio]; /* sch->limit can change under us from skbprio_change() */ if (sch->q.qlen < READ_ONCE(sch->limit)) { __skb_queue_tail(qdisc, skb); qdisc_qstats_backlog_inc(sch, skb); q->qstats[prio].backlog += qdisc_pkt_len(skb); /* Check to update highest and lowest priorities. */ if (prio > q->highest_prio) q->highest_prio = prio; if (prio < q->lowest_prio) q->lowest_prio = prio; sch->q.qlen++; return NET_XMIT_SUCCESS; } /* If this packet has the lowest priority, drop it. */ lp = q->lowest_prio; if (prio <= lp) { q->qstats[prio].drops++; q->qstats[prio].overlimits++; return qdisc_drop(skb, sch, to_free); } __skb_queue_tail(qdisc, skb); qdisc_qstats_backlog_inc(sch, skb); q->qstats[prio].backlog += qdisc_pkt_len(skb); /* Drop the packet at the tail of the lowest priority qdisc. */ lp_qdisc = &q->qdiscs[lp]; to_drop = __skb_dequeue_tail(lp_qdisc); BUG_ON(!to_drop); qdisc_qstats_backlog_dec(sch, to_drop); qdisc_drop(to_drop, sch, to_free); q->qstats[lp].backlog -= qdisc_pkt_len(to_drop); q->qstats[lp].drops++; q->qstats[lp].overlimits++; /* Check to update highest and lowest priorities. */ if (skb_queue_empty(lp_qdisc)) { if (q->lowest_prio == q->highest_prio) { q->lowest_prio = prio; q->highest_prio = prio; } else { q->lowest_prio = calc_new_low_prio(q); } } if (prio > q->highest_prio) q->highest_prio = prio; return NET_XMIT_CN; } static struct sk_buff *skbprio_dequeue(struct Qdisc *sch) { struct skbprio_sched_data *q = qdisc_priv(sch); struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio]; struct sk_buff *skb = __skb_dequeue(hpq); if (unlikely(!skb)) return NULL; sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb); /* Update highest priority field. */ if (skb_queue_empty(hpq)) { if (q->lowest_prio == q->highest_prio) { q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; } else { q->highest_prio = calc_new_high_prio(q); } } return skb; } static int skbprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct tc_skbprio_qopt *ctl = nla_data(opt); if (opt->nla_len != nla_attr_size(sizeof(*ctl))) return -EINVAL; WRITE_ONCE(sch->limit, ctl->limit); return 0; } static int skbprio_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct skbprio_sched_data *q = qdisc_priv(sch); int prio; /* Initialise all queues, one for each possible priority. */ for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_head_init(&q->qdiscs[prio]); memset(&q->qstats, 0, sizeof(q->qstats)); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; sch->limit = 64; if (!opt) return 0; return skbprio_change(sch, opt, extack); } static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tc_skbprio_qopt opt; opt.limit = READ_ONCE(sch->limit); if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) return -1; return skb->len; } static void skbprio_reset(struct Qdisc *sch) { struct skbprio_sched_data *q = qdisc_priv(sch); int prio; for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_purge(&q->qdiscs[prio]); memset(&q->qstats, 0, sizeof(q->qstats)); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; } static void skbprio_destroy(struct Qdisc *sch) { struct skbprio_sched_data *q = qdisc_priv(sch); int prio; for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_purge(&q->qdiscs[prio]); } static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; } static unsigned long skbprio_find(struct Qdisc *sch, u32 classid) { return 0; } static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { tcm->tcm_handle |= TC_H_MIN(cl); return 0; } static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct skbprio_sched_data *q = qdisc_priv(sch); if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1], q->qstats[cl - 1].qlen) < 0) return -1; return 0; } static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) { unsigned int i; if (arg->stop) return; for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) { if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; } } static const struct Qdisc_class_ops skbprio_class_ops = { .leaf = skbprio_leaf, .find = skbprio_find, .dump = skbprio_dump_class, .dump_stats = skbprio_dump_class_stats, .walk = skbprio_walk, }; static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = { .cl_ops = &skbprio_class_ops, .id = "skbprio", .priv_size = sizeof(struct skbprio_sched_data), .enqueue = skbprio_enqueue, .dequeue = skbprio_dequeue, .peek = qdisc_peek_dequeued, .init = skbprio_init, .reset = skbprio_reset, .change = skbprio_change, .dump = skbprio_dump, .destroy = skbprio_destroy, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("skbprio"); static int __init skbprio_module_init(void) { return register_qdisc(&skbprio_qdisc_ops); } static void __exit skbprio_module_exit(void) { unregister_qdisc(&skbprio_qdisc_ops); } module_init(skbprio_module_init) module_exit(skbprio_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SKB priority based scheduling qdisc"); |
| 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | /* SPDX-License-Identifier: GPL-2.0 */ /* * BSD Process Accounting for Linux - Definitions * * Author: Marco van Wieringen (mvw@planets.elm.net) * * This header file contains the definitions needed to implement * BSD-style process accounting. The kernel accounting code and all * user-level programs that try to do something useful with the * process accounting log must include this file. * * Copyright (C) 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. * */ #ifndef _LINUX_ACCT_H #define _LINUX_ACCT_H #include <uapi/linux/acct.h> #ifdef CONFIG_BSD_PROCESS_ACCT struct pid_namespace; extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); extern void acct_exit_ns(struct pid_namespace *); #else #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) #define acct_exit_ns(ns) do { } while (0) #endif /* * ACCT_VERSION numbers as yet defined: * 0: old format (until 2.6.7) with 16 bit uid/gid * 1: extended variant (binary compatible on M68K) * 2: extended variant (binary compatible on everything except M68K) * 3: new binary incompatible format (64 bytes) * 4: new binary incompatible format (128 bytes) * 5: new binary incompatible format (128 bytes, second half) * */ #undef ACCT_VERSION #undef AHZ #ifdef CONFIG_BSD_PROCESS_ACCT_V3 #define ACCT_VERSION 3 #define AHZ 100 typedef struct acct_v3 acct_t; #else #ifdef CONFIG_M68K #define ACCT_VERSION 1 #else #define ACCT_VERSION 2 #endif #define AHZ (USER_HZ) typedef struct acct acct_t; #endif #include <linux/jiffies.h> /* * Yet another set of HZ to *HZ helper functions. * See <linux/jiffies.h> for the original. */ static inline u32 jiffies_to_AHZ(unsigned long x) { #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0 # if HZ < AHZ return x * (AHZ / HZ); # else return x / (HZ / AHZ); # endif #else u64 tmp = (u64)x * TICK_NSEC; do_div(tmp, (NSEC_PER_SEC / AHZ)); return (long)tmp; #endif } static inline u64 nsec_to_AHZ(u64 x) { #if (NSEC_PER_SEC % AHZ) == 0 do_div(x, (NSEC_PER_SEC / AHZ)); #elif (AHZ % 512) == 0 x *= AHZ/512; do_div(x, (NSEC_PER_SEC / 512)); #else /* * max relative error 5.7e-8 (1.8s per year) for AHZ <= 1024, * overflow after 64.99 years. * exact for AHZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... */ x *= 9; do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (AHZ/2)) / AHZ)); #endif return x; } #endif /* _LINUX_ACCT_H */ |
| 7 7 7 6 7 1 1 1 1 1 1 1 1 1 13 5 8 2 13 13 7 7 1 1 1 1 1 13 24 13 13 1 1 1 24 21 20 20 19 18 17 16 15 14 9 14 9 9 4 21 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_meta.h> #include <net/netfilter/nf_tables_offload.h> #include <linux/tcp.h> #include <linux/udp.h> #include <net/gre.h> #include <net/geneve.h> #include <net/ip.h> #include <linux/icmpv6.h> #include <linux/ip.h> #include <linux/ipv6.h> struct nft_inner_tun_ctx_locked { struct nft_inner_tun_ctx ctx; local_lock_t bh_lock; }; static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; /* Same layout as nft_expr but it embeds the private expression data area. */ struct __nft_expr { const struct nft_expr_ops *ops; union { struct nft_payload payload; struct nft_meta meta; } __attribute__((aligned(__alignof__(u64)))); }; enum { NFT_INNER_EXPR_PAYLOAD, NFT_INNER_EXPR_META, }; struct nft_inner { u8 flags; u8 hdrsize; u8 type; u8 expr_type; struct __nft_expr expr; }; static int nft_inner_parse_l2l3(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 off) { __be16 llproto, outer_llproto; u32 nhoff, thoff; if (priv->flags & NFT_INNER_LL) { struct vlan_ethhdr *veth, _veth; struct ethhdr *eth, _eth; u32 hdrsize; eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth); if (!eth) return -1; switch (eth->h_proto) { case htons(ETH_P_IP): case htons(ETH_P_IPV6): llproto = eth->h_proto; hdrsize = sizeof(_eth); break; case htons(ETH_P_8021Q): veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth); if (!veth) return -1; outer_llproto = veth->h_vlan_encapsulated_proto; llproto = veth->h_vlan_proto; hdrsize = sizeof(_veth); break; default: return -1; } ctx->inner_lloff = off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL; off += hdrsize; } else { struct iphdr *iph; u32 _version; iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version); if (!iph) return -1; switch (iph->version) { case 4: llproto = htons(ETH_P_IP); break; case 6: llproto = htons(ETH_P_IPV6); break; default: return -1; } } ctx->llproto = llproto; if (llproto == htons(ETH_P_8021Q)) llproto = outer_llproto; nhoff = off; switch (llproto) { case htons(ETH_P_IP): { struct iphdr *iph, _iph; iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph); if (!iph) return -1; if (iph->ihl < 5 || iph->version != 4) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff + (iph->ihl * 4); if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) { ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = iph->protocol; } } break; case htons(ETH_P_IPV6): { struct ipv6hdr *ip6h, _ip6h; int fh_flags = IP6_FH_F_AUTH; unsigned short fragoff; int l4proto; ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h); if (!ip6h) return -1; if (ip6h->version != 6) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff; l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags); if (l4proto < 0 || thoff > U16_MAX) return -1; if (fragoff == 0) { thoff = nhoff + sizeof(_ip6h); ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = l4proto; } } break; default: return -1; } return 0; } static int nft_inner_parse_tunhdr(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 *off) { if (pkt->tprot == IPPROTO_GRE) { ctx->inner_tunoff = pkt->thoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; return 0; } if (pkt->tprot != IPPROTO_UDP) return -1; ctx->inner_tunoff = *off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; *off += priv->hdrsize; switch (priv->type) { case NFT_INNER_GENEVE: { struct genevehdr *gnvh, _gnvh; gnvh = skb_header_pointer(pkt->skb, pkt->inneroff, sizeof(_gnvh), &_gnvh); if (!gnvh) return -1; *off += gnvh->opt_len * 4; } break; default: break; } return 0; } static int nft_inner_parse(const struct nft_inner *priv, struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { u32 off = pkt->inneroff; if (priv->flags & NFT_INNER_HDRSIZE && nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0) return -1; if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0) return -1; } else if (priv->flags & NFT_INNER_TH) { tun_ctx->inner_thoff = off; tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; } tun_ctx->type = priv->type; tun_ctx->cookie = (unsigned long)pkt->skb; pkt->flags |= NFT_PKTINFO_INNER_FULL; return 0; } static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { local_bh_enable(); local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); return false; } *tun_ctx = *this_cpu_tun_ctx; local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); local_bh_enable(); return true; } static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt, const struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != tun_ctx->cookie) *this_cpu_tun_ctx = *tun_ctx; local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); local_bh_enable(); } static bool nft_inner_parse_needed(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) return true; if (!nft_inner_restore_tun_ctx(pkt, tun_ctx)) return true; if (priv->type != tun_ctx->type) return true; return false; } static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_inner *priv = nft_expr_priv(expr); struct nft_inner_tun_ctx tun_ctx = {}; if (nft_payload_inner_offset(pkt) < 0) goto err; if (nft_inner_parse_needed(priv, pkt, &tun_ctx) && nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0) goto err; switch (priv->expr_type) { case NFT_INNER_EXPR_PAYLOAD: nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; case NFT_INNER_EXPR_META: nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; default: WARN_ON_ONCE(1); goto err; } nft_inner_save_tun_ctx(pkt, &tun_ctx); return; err: regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { [NFTA_INNER_NUM] = { .type = NLA_U32 }, [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, [NFTA_INNER_TYPE] = { .type = NLA_U32 }, [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, }; struct nft_expr_info { const struct nft_expr_ops *ops; const struct nlattr *attr; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nft_inner_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_inner *priv = nft_expr_priv(expr); u32 flags, hdrsize, type, num; struct nft_expr_info expr_info; int err; if (!tb[NFTA_INNER_FLAGS] || !tb[NFTA_INNER_NUM] || !tb[NFTA_INNER_HDRSIZE] || !tb[NFTA_INNER_TYPE] || !tb[NFTA_INNER_EXPR]) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS])); if (flags & ~NFT_INNER_MASK) return -EOPNOTSUPP; num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM])); if (num != 0) return -EOPNOTSUPP; hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE])); type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE])); if (type > U8_MAX) return -EINVAL; if (flags & NFT_INNER_HDRSIZE) { if (hdrsize == 0 || hdrsize > 64) return -EOPNOTSUPP; } priv->flags = flags; priv->hdrsize = hdrsize; priv->type = type; err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info); if (err < 0) return err; priv->expr.ops = expr_info.ops; if (!strcmp(expr_info.ops->type->name, "payload")) priv->expr_type = NFT_INNER_EXPR_PAYLOAD; else if (!strcmp(expr_info.ops->type->name, "meta")) priv->expr_type = NFT_INNER_EXPR_META; else return -EINVAL; err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr, (const struct nlattr * const*)expr_info.tb); if (err < 0) return err; return 0; } static int nft_inner_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_inner *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) || nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) || nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) || nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize))) goto nla_put_failure; if (nft_expr_dump(skb, NFTA_INNER_EXPR, (struct nft_expr *)&priv->expr, reset) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nft_expr_ops nft_inner_ops = { .type = &nft_inner_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)), .eval = nft_inner_eval, .init = nft_inner_init, .dump = nft_inner_dump, }; struct nft_expr_type nft_inner_type __read_mostly = { .name = "inner", .ops = &nft_inner_ops, .policy = nft_inner_policy, .maxattr = NFTA_INNER_MAX, .owner = THIS_MODULE, }; |
| 81 81 4 49 4 79 79 79 79 4 58 58 2 2 2 2 2 2 2 9 9 8 8 8 8 8 9 9 9 9 9 5 2 2 5 4 4 4 4 4 4 4 4 1 4 4 4 1 25 25 25 25 25 21 20 21 21 21 25 25 25 24 25 24 4 4 1 4 4 4 4 1 1 1 4 4 4 4 1 1 1 1 1 9 4 4 4 2 2 4 15 15 15 15 15 49 49 49 49 48 30 30 30 30 30 30 30 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 1 10 10 10 10 2 2 2 1 1 1 1 1 1 1 1 1 1 8 4 3 2 5 8 4 4 4 4 4 4 25 25 4 4 4 4 4 2 2 2 4 4 1 1 1 1 8 8 8 44 44 44 44 4 1 4 4 4 4 4 4 24 24 24 24 24 24 24 24 24 24 24 4 4 4 16 24 20 20 20 20 24 24 6 6 6 24 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 | // SPDX-License-Identifier: GPL-2.0+ /* * Driver core for serial ports * * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. * * Copyright 1999 ARM Limited * Copyright (C) 2000-2001 Deep Blue Solutions Ltd. */ #include <linux/module.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/init.h> #include <linux/console.h> #include <linux/gpio/consumer.h> #include <linux/kernel.h> #include <linux/of.h> #include <linux/pm_runtime.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/device.h> #include <linux/serial.h> /* for serial_state and serial_icounter_struct */ #include <linux/serial_core.h> #include <linux/sysrq.h> #include <linux/delay.h> #include <linux/mutex.h> #include <linux/math64.h> #include <linux/security.h> #include <linux/irq.h> #include <linux/uaccess.h> #include "serial_base.h" /* * This is used to lock changes in serial line configuration. */ static DEFINE_MUTEX(port_mutex); /* * lockdep: port->lock is initialized in two places, but we * want only one lock-class: */ static struct lock_class_key port_lock_key; #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) /* * Max time with active RTS before/after data is sent. */ #define RS485_MAX_RTS_DELAY 100 /* msecs */ static void uart_change_pm(struct uart_state *state, enum uart_pm_state pm_state); static void uart_port_shutdown(struct tty_port *port); static int uart_dcd_enabled(struct uart_port *uport) { return !!(uport->status & UPSTAT_DCD_ENABLE); } static inline struct uart_port *uart_port_ref(struct uart_state *state) { if (atomic_add_unless(&state->refcount, 1, 0)) return state->uart_port; return NULL; } static inline void uart_port_deref(struct uart_port *uport) { if (atomic_dec_and_test(&uport->state->refcount)) wake_up(&uport->state->remove_wait); } static inline struct uart_port *uart_port_ref_lock(struct uart_state *state, unsigned long *flags) { struct uart_port *uport = uart_port_ref(state); if (uport) uart_port_lock_irqsave(uport, flags); return uport; } static inline void uart_port_unlock_deref(struct uart_port *uport, unsigned long flags) { if (uport) { uart_port_unlock_irqrestore(uport, flags); uart_port_deref(uport); } } static inline struct uart_port *uart_port_check(struct uart_state *state) { lockdep_assert_held(&state->port.mutex); return state->uart_port; } /** * uart_write_wakeup - schedule write processing * @port: port to be processed * * This routine is used by the interrupt handler to schedule processing in the * software interrupt portion of the driver. A driver is expected to call this * function when the number of characters in the transmit buffer have dropped * below a threshold. * * Locking: @port->lock should be held */ void uart_write_wakeup(struct uart_port *port) { struct uart_state *state = port->state; /* * This means you called this function _after_ the port was * closed. No cookie for you. */ BUG_ON(!state); tty_port_tty_wakeup(&state->port); } EXPORT_SYMBOL(uart_write_wakeup); static void uart_stop(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long flags; port = uart_port_ref_lock(state, &flags); if (port) port->ops->stop_tx(port); uart_port_unlock_deref(port, flags); } static void __uart_start(struct uart_state *state) { struct uart_port *port = state->uart_port; struct serial_port_device *port_dev; int err; if (!port || port->flags & UPF_DEAD || uart_tx_stopped(port)) return; port_dev = port->port_dev; /* Increment the runtime PM usage count for the active check below */ err = pm_runtime_get(&port_dev->dev); if (err < 0 && err != -EINPROGRESS) { pm_runtime_put_noidle(&port_dev->dev); return; } /* * Start TX if enabled, and kick runtime PM. If the device is not * enabled, serial_port_runtime_resume() calls start_tx() again * after enabling the device. */ if (!pm_runtime_enabled(port->dev) || pm_runtime_active(&port_dev->dev)) port->ops->start_tx(port); pm_runtime_mark_last_busy(&port_dev->dev); pm_runtime_put_autosuspend(&port_dev->dev); } static void uart_start(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long flags; port = uart_port_ref_lock(state, &flags); __uart_start(state); uart_port_unlock_deref(port, flags); } static void uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) { unsigned int old; guard(uart_port_lock_irqsave)(port); old = port->mctrl; port->mctrl = (old & ~clear) | set; if (old != port->mctrl && !(port->rs485.flags & SER_RS485_ENABLED)) port->ops->set_mctrl(port, port->mctrl); } #define uart_set_mctrl(port, set) uart_update_mctrl(port, set, 0) #define uart_clear_mctrl(port, clear) uart_update_mctrl(port, 0, clear) static void uart_port_dtr_rts(struct uart_port *uport, bool active) { if (active) uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); else uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS); } /* Caller holds port mutex */ static void uart_change_line_settings(struct tty_struct *tty, struct uart_state *state, const struct ktermios *old_termios) { struct uart_port *uport = uart_port_check(state); struct ktermios *termios; bool old_hw_stopped; /* * If we have no tty, termios, or the port does not exist, * then we can't set the parameters for this port. */ if (!tty || uport->type == PORT_UNKNOWN) return; termios = &tty->termios; uport->ops->set_termios(uport, termios, old_termios); /* * Set modem status enables based on termios cflag */ guard(uart_port_lock_irq)(uport); if (termios->c_cflag & CRTSCTS) uport->status |= UPSTAT_CTS_ENABLE; else uport->status &= ~UPSTAT_CTS_ENABLE; if (termios->c_cflag & CLOCAL) uport->status &= ~UPSTAT_DCD_ENABLE; else uport->status |= UPSTAT_DCD_ENABLE; /* reset sw-assisted CTS flow control based on (possibly) new mode */ old_hw_stopped = uport->hw_stopped; uport->hw_stopped = uart_softcts_mode(uport) && !(uport->ops->get_mctrl(uport) & TIOCM_CTS); if (uport->hw_stopped != old_hw_stopped) { if (!old_hw_stopped) uport->ops->stop_tx(uport); else __uart_start(state); } } static int uart_alloc_xmit_buf(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; unsigned long flags; unsigned long page; /* * Initialise and allocate the transmit and temporary * buffer. */ page = get_zeroed_page(GFP_KERNEL); if (!page) return -ENOMEM; uport = uart_port_ref_lock(state, &flags); if (!state->port.xmit_buf) { state->port.xmit_buf = (unsigned char *)page; kfifo_init(&state->port.xmit_fifo, state->port.xmit_buf, PAGE_SIZE); uart_port_unlock_deref(uport, flags); } else { uart_port_unlock_deref(uport, flags); /* * Do not free() the page under the port lock, see * uart_free_xmit_buf(). */ free_page(page); } return 0; } static void uart_free_xmit_buf(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; unsigned long flags; char *xmit_buf; /* * Do not free() the transmit buffer page under the port lock since * this can create various circular locking scenarios. For instance, * console driver may need to allocate/free a debug object, which * can end up in printk() recursion. */ uport = uart_port_ref_lock(state, &flags); xmit_buf = port->xmit_buf; port->xmit_buf = NULL; INIT_KFIFO(port->xmit_fifo); uart_port_unlock_deref(uport, flags); free_page((unsigned long)xmit_buf); } /* * Startup the port. This will be called once per open. All calls * will be serialised by the per-port mutex. */ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, bool init_hw) { struct uart_port *uport = uart_port_check(state); int retval; if (uport->type == PORT_UNKNOWN) return 1; /* * Make sure the device is in D0 state. */ uart_change_pm(state, UART_PM_STATE_ON); retval = uart_alloc_xmit_buf(&state->port); if (retval) return retval; retval = uport->ops->startup(uport); if (retval == 0) { if (uart_console(uport) && uport->cons->cflag) { tty->termios.c_cflag = uport->cons->cflag; tty->termios.c_ispeed = uport->cons->ispeed; tty->termios.c_ospeed = uport->cons->ospeed; uport->cons->cflag = 0; uport->cons->ispeed = 0; uport->cons->ospeed = 0; } /* * Initialise the hardware port settings. */ uart_change_line_settings(tty, state, NULL); /* * Setup the RTS and DTR signals once the * port is open and ready to respond. */ if (init_hw && C_BAUD(tty)) uart_port_dtr_rts(uport, true); } /* * This is to allow setserial on this port. People may want to set * port/irq/type and then reconfigure the port properly if it failed * now. */ if (retval && capable(CAP_SYS_ADMIN)) return 1; return retval; } static int uart_startup(struct tty_struct *tty, struct uart_state *state, bool init_hw) { struct tty_port *port = &state->port; struct uart_port *uport; int retval; if (tty_port_initialized(port)) goto out_base_port_startup; retval = uart_port_startup(tty, state, init_hw); if (retval) { set_bit(TTY_IO_ERROR, &tty->flags); return retval; } out_base_port_startup: uport = uart_port_check(state); if (!uport) return -EIO; serial_base_port_startup(uport); return 0; } /* * This routine will shutdown a serial port; interrupts are disabled, and * DTR is dropped if the hangup on close termio flag is on. Calls to * uart_shutdown are serialised by the per-port semaphore. * * uport == NULL if uart_port has already been removed */ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) { struct uart_port *uport = uart_port_check(state); struct tty_port *port = &state->port; /* * Set the TTY IO error marker */ if (tty) set_bit(TTY_IO_ERROR, &tty->flags); if (uport) serial_base_port_shutdown(uport); if (tty_port_initialized(port)) { tty_port_set_initialized(port, false); /* * Turn off DTR and RTS early. */ if (uport) { if (uart_console(uport) && tty) { uport->cons->cflag = tty->termios.c_cflag; uport->cons->ispeed = tty->termios.c_ispeed; uport->cons->ospeed = tty->termios.c_ospeed; } if (!tty || C_HUPCL(tty)) uart_port_dtr_rts(uport, false); } uart_port_shutdown(port); } /* * It's possible for shutdown to be called after suspend if we get * a DCD drop (hangup) at just the right time. Clear suspended bit so * we don't try to resume a port that has been shutdown. */ tty_port_set_suspended(port, false); uart_free_xmit_buf(port); } /** * uart_update_timeout - update per-port frame timing information * @port: uart_port structure describing the port * @cflag: termios cflag value * @baud: speed of the port * * Set the @port frame timing information from which the FIFO timeout value is * derived. The @cflag value should reflect the actual hardware settings as * number of bits, parity, stop bits and baud rate is taken into account here. * * Locking: caller is expected to take @port->lock */ void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud) { u64 temp = tty_get_frame_size(cflag); temp *= NSEC_PER_SEC; port->frame_time = (unsigned int)DIV64_U64_ROUND_UP(temp, baud); } EXPORT_SYMBOL(uart_update_timeout); /** * uart_get_baud_rate - return baud rate for a particular port * @port: uart_port structure describing the port in question. * @termios: desired termios settings * @old: old termios (or %NULL) * @min: minimum acceptable baud rate * @max: maximum acceptable baud rate * * Decode the termios structure into a numeric baud rate, taking account of the * magic 38400 baud rate (with spd_* flags), and mapping the %B0 rate to 9600 * baud. * * If the new baud rate is invalid, try the @old termios setting. If it's still * invalid, we try 9600 baud. If that is also invalid 0 is returned. * * The @termios structure is updated to reflect the baud rate we're actually * going to be using. Don't do this for the case where B0 is requested ("hang * up"). * * Locking: caller dependent */ unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, const struct ktermios *old, unsigned int min, unsigned int max) { unsigned int try; unsigned int baud; unsigned int altbaud; int hung_up = 0; upf_t flags = port->flags & UPF_SPD_MASK; switch (flags) { case UPF_SPD_HI: altbaud = 57600; break; case UPF_SPD_VHI: altbaud = 115200; break; case UPF_SPD_SHI: altbaud = 230400; break; case UPF_SPD_WARP: altbaud = 460800; break; default: altbaud = 38400; break; } for (try = 0; try < 2; try++) { baud = tty_termios_baud_rate(termios); /* * The spd_hi, spd_vhi, spd_shi, spd_warp kludge... * Die! Die! Die! */ if (try == 0 && baud == 38400) baud = altbaud; /* * Special case: B0 rate. */ if (baud == 0) { hung_up = 1; baud = 9600; } if (baud >= min && baud <= max) return baud; /* * Oops, the quotient was zero. Try again with * the old baud rate if possible. */ termios->c_cflag &= ~CBAUD; if (old) { baud = tty_termios_baud_rate(old); if (!hung_up) tty_termios_encode_baud_rate(termios, baud, baud); old = NULL; continue; } /* * As a last resort, if the range cannot be met then clip to * the nearest chip supported rate. */ if (!hung_up) { if (baud <= min) tty_termios_encode_baud_rate(termios, min + 1, min + 1); else tty_termios_encode_baud_rate(termios, max - 1, max - 1); } } return 0; } EXPORT_SYMBOL(uart_get_baud_rate); /** * uart_get_divisor - return uart clock divisor * @port: uart_port structure describing the port * @baud: desired baud rate * * Calculate the divisor (baud_base / baud) for the specified @baud, * appropriately rounded. * * If 38400 baud and custom divisor is selected, return the custom divisor * instead. * * Locking: caller dependent */ unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud) { unsigned int quot; /* * Old custom speed handling. */ if (baud == 38400 && (port->flags & UPF_SPD_MASK) == UPF_SPD_CUST) quot = port->custom_divisor; else quot = DIV_ROUND_CLOSEST(port->uartclk, 16 * baud); return quot; } EXPORT_SYMBOL(uart_get_divisor); static int uart_put_char(struct tty_struct *tty, u8 c) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long flags; int ret = 0; port = uart_port_ref_lock(state, &flags); if (!state->port.xmit_buf) { uart_port_unlock_deref(port, flags); return 0; } if (port) ret = kfifo_put(&state->port.xmit_fifo, c); uart_port_unlock_deref(port, flags); return ret; } static void uart_flush_chars(struct tty_struct *tty) { uart_start(tty); } static ssize_t uart_write(struct tty_struct *tty, const u8 *buf, size_t count) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long flags; int ret = 0; /* * This means you called this function _after_ the port was * closed. No cookie for you. */ if (WARN_ON(!state)) return -EL3HLT; port = uart_port_ref_lock(state, &flags); if (!state->port.xmit_buf) { uart_port_unlock_deref(port, flags); return 0; } if (port) ret = kfifo_in(&state->port.xmit_fifo, buf, count); __uart_start(state); uart_port_unlock_deref(port, flags); return ret; } static unsigned int uart_write_room(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long flags; unsigned int ret; port = uart_port_ref_lock(state, &flags); ret = kfifo_avail(&state->port.xmit_fifo); uart_port_unlock_deref(port, flags); return ret; } static unsigned int uart_chars_in_buffer(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long flags; unsigned int ret; port = uart_port_ref_lock(state, &flags); ret = kfifo_len(&state->port.xmit_fifo); uart_port_unlock_deref(port, flags); return ret; } static void uart_flush_buffer(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long flags; /* * This means you called this function _after_ the port was * closed. No cookie for you. */ if (WARN_ON(!state)) return; pr_debug("uart_flush_buffer(%d) called\n", tty->index); port = uart_port_ref_lock(state, &flags); if (!port) return; kfifo_reset(&state->port.xmit_fifo); if (port->ops->flush_buffer) port->ops->flush_buffer(port); uart_port_unlock_deref(port, flags); tty_port_tty_wakeup(&state->port); } /* * This function performs low-level write of high-priority XON/XOFF * character and accounting for it. * * Requires uart_port to implement .serial_out(). */ void uart_xchar_out(struct uart_port *uport, int offset) { serial_port_out(uport, offset, uport->x_char); uport->icount.tx++; uport->x_char = 0; } EXPORT_SYMBOL_GPL(uart_xchar_out); /* * This function is used to send a high-priority XON/XOFF character to * the device */ static void uart_send_xchar(struct tty_struct *tty, u8 ch) { struct uart_state *state = tty->driver_data; struct uart_port *port; port = uart_port_ref(state); if (!port) return; if (port->ops->send_xchar) port->ops->send_xchar(port, ch); else { guard(uart_port_lock_irqsave)(port); port->x_char = ch; if (ch) port->ops->start_tx(port); } uart_port_deref(port); } static void uart_throttle(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; upstat_t mask = UPSTAT_SYNC_FIFO; struct uart_port *port; port = uart_port_ref(state); if (!port) return; if (I_IXOFF(tty)) mask |= UPSTAT_AUTOXOFF; if (C_CRTSCTS(tty)) mask |= UPSTAT_AUTORTS; if (port->status & mask) { port->ops->throttle(port); mask &= ~port->status; } if (mask & UPSTAT_AUTORTS) uart_clear_mctrl(port, TIOCM_RTS); if (mask & UPSTAT_AUTOXOFF) uart_send_xchar(tty, STOP_CHAR(tty)); uart_port_deref(port); } static void uart_unthrottle(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; upstat_t mask = UPSTAT_SYNC_FIFO; struct uart_port *port; port = uart_port_ref(state); if (!port) return; if (I_IXOFF(tty)) mask |= UPSTAT_AUTOXOFF; if (C_CRTSCTS(tty)) mask |= UPSTAT_AUTORTS; if (port->status & mask) { port->ops->unthrottle(port); mask &= ~port->status; } if (mask & UPSTAT_AUTORTS) uart_set_mctrl(port, TIOCM_RTS); if (mask & UPSTAT_AUTOXOFF) uart_send_xchar(tty, START_CHAR(tty)); uart_port_deref(port); } static int uart_get_info(struct tty_port *port, struct serial_struct *retinfo) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; /* Initialize structure in case we error out later to prevent any stack info leakage. */ *retinfo = (struct serial_struct){}; /* * Ensure the state we copy is consistent and no hardware changes * occur as we go */ guard(mutex)(&port->mutex); uport = uart_port_check(state); if (!uport) return -ENODEV; retinfo->type = uport->type; retinfo->line = uport->line; retinfo->port = uport->iobase; if (HIGH_BITS_OFFSET) retinfo->port_high = (long) uport->iobase >> HIGH_BITS_OFFSET; retinfo->irq = uport->irq; retinfo->flags = (__force int)uport->flags; retinfo->xmit_fifo_size = uport->fifosize; retinfo->baud_base = uport->uartclk / 16; retinfo->close_delay = jiffies_to_msecs(port->close_delay) / 10; retinfo->closing_wait = port->closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : jiffies_to_msecs(port->closing_wait) / 10; retinfo->custom_divisor = uport->custom_divisor; retinfo->hub6 = uport->hub6; retinfo->io_type = uport->iotype; retinfo->iomem_reg_shift = uport->regshift; retinfo->iomem_base = (void *)(unsigned long)uport->mapbase; return 0; } static int uart_get_info_user(struct tty_struct *tty, struct serial_struct *ss) { struct uart_state *state = tty->driver_data; struct tty_port *port = &state->port; return uart_get_info(port, ss) < 0 ? -EIO : 0; } static int uart_change_port(struct uart_port *uport, const struct serial_struct *new_info, unsigned long new_port) { unsigned long old_iobase, old_mapbase; unsigned int old_type, old_iotype, old_hub6, old_shift; int retval; old_iobase = uport->iobase; old_mapbase = uport->mapbase; old_type = uport->type; old_hub6 = uport->hub6; old_iotype = uport->iotype; old_shift = uport->regshift; if (old_type != PORT_UNKNOWN && uport->ops->release_port) uport->ops->release_port(uport); uport->iobase = new_port; uport->type = new_info->type; uport->hub6 = new_info->hub6; uport->iotype = new_info->io_type; uport->regshift = new_info->iomem_reg_shift; uport->mapbase = (unsigned long)new_info->iomem_base; if (uport->type == PORT_UNKNOWN || !uport->ops->request_port) return 0; retval = uport->ops->request_port(uport); if (retval == 0) return 0; /* succeeded => done */ /* * If we fail to request resources for the new port, try to restore the * old settings. */ uport->iobase = old_iobase; uport->type = old_type; uport->hub6 = old_hub6; uport->iotype = old_iotype; uport->regshift = old_shift; uport->mapbase = old_mapbase; if (old_type == PORT_UNKNOWN) return retval; retval = uport->ops->request_port(uport); /* If we failed to restore the old settings, we fail like this. */ if (retval) uport->type = PORT_UNKNOWN; /* We failed anyway. */ return -EBUSY; } static int uart_set_info(struct tty_struct *tty, struct tty_port *port, struct uart_state *state, struct serial_struct *new_info) { struct uart_port *uport = uart_port_check(state); unsigned long new_port; unsigned int old_custom_divisor, close_delay, closing_wait; bool change_irq, change_port; upf_t old_flags, new_flags; int retval; if (!uport) return -EIO; new_port = new_info->port; if (HIGH_BITS_OFFSET) new_port += (unsigned long) new_info->port_high << HIGH_BITS_OFFSET; new_info->irq = irq_canonicalize(new_info->irq); close_delay = msecs_to_jiffies(new_info->close_delay * 10); closing_wait = new_info->closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : msecs_to_jiffies(new_info->closing_wait * 10); change_irq = !(uport->flags & UPF_FIXED_PORT) && new_info->irq != uport->irq; /* * Since changing the 'type' of the port changes its resource * allocations, we should treat type changes the same as * IO port changes. */ change_port = !(uport->flags & UPF_FIXED_PORT) && (new_port != uport->iobase || (unsigned long)new_info->iomem_base != uport->mapbase || new_info->hub6 != uport->hub6 || new_info->io_type != uport->iotype || new_info->iomem_reg_shift != uport->regshift || new_info->type != uport->type); old_flags = uport->flags; new_flags = (__force upf_t)new_info->flags; old_custom_divisor = uport->custom_divisor; if (!(uport->flags & UPF_FIXED_PORT)) { unsigned int uartclk = new_info->baud_base * 16; /* check needs to be done here before other settings made */ if (uartclk == 0) return -EINVAL; } if (!capable(CAP_SYS_ADMIN)) { if (change_irq || change_port || (new_info->baud_base != uport->uartclk / 16) || (close_delay != port->close_delay) || (closing_wait != port->closing_wait) || (new_info->xmit_fifo_size && new_info->xmit_fifo_size != uport->fifosize) || (((new_flags ^ old_flags) & ~UPF_USR_MASK) != 0)) return -EPERM; uport->flags = ((uport->flags & ~UPF_USR_MASK) | (new_flags & UPF_USR_MASK)); uport->custom_divisor = new_info->custom_divisor; goto check_and_exit; } if (change_irq || change_port) { retval = security_locked_down(LOCKDOWN_TIOCSSERIAL); if (retval) return retval; } /* Ask the low level driver to verify the settings. */ if (uport->ops->verify_port) { retval = uport->ops->verify_port(uport, new_info); if (retval) return retval; } if ((new_info->irq >= irq_get_nr_irqs()) || (new_info->irq < 0) || (new_info->baud_base < 9600)) return -EINVAL; if (change_port || change_irq) { /* Make sure that we are the sole user of this port. */ if (tty_port_users(port) > 1) return -EBUSY; /* * We need to shutdown the serial port at the old * port/type/irq combination. */ uart_shutdown(tty, state); } if (change_port) { retval = uart_change_port(uport, new_info, new_port); if (retval) return retval; } if (change_irq) uport->irq = new_info->irq; if (!(uport->flags & UPF_FIXED_PORT)) uport->uartclk = new_info->baud_base * 16; uport->flags = (uport->flags & ~UPF_CHANGE_MASK) | (new_flags & UPF_CHANGE_MASK); uport->custom_divisor = new_info->custom_divisor; port->close_delay = close_delay; port->closing_wait = closing_wait; if (new_info->xmit_fifo_size) uport->fifosize = new_info->xmit_fifo_size; check_and_exit: if (uport->type == PORT_UNKNOWN) return 0; if (tty_port_initialized(port)) { if (((old_flags ^ uport->flags) & UPF_SPD_MASK) || old_custom_divisor != uport->custom_divisor) { /* * If they're setting up a custom divisor or speed, * instead of clearing it, then bitch about it. */ if (uport->flags & UPF_SPD_MASK) { dev_notice_ratelimited(uport->dev, "%s sets custom speed on %s. This is deprecated.\n", current->comm, tty_name(port->tty)); } uart_change_line_settings(tty, state, NULL); } return 0; } retval = uart_startup(tty, state, true); if (retval < 0) return retval; if (retval == 0) tty_port_set_initialized(port, true); return 0; } static int uart_set_info_user(struct tty_struct *tty, struct serial_struct *ss) { struct uart_state *state = tty->driver_data; struct tty_port *port = &state->port; guard(rwsem_write)(&tty->termios_rwsem); /* * This semaphore protects port->count. It is also * very useful to prevent opens. Also, take the * port configuration semaphore to make sure that a * module insertion/removal doesn't change anything * under us. */ guard(mutex)(&port->mutex); return uart_set_info(tty, port, state, ss); } /** * uart_get_lsr_info - get line status register info * @tty: tty associated with the UART * @state: UART being queried * @value: returned modem value */ static int uart_get_lsr_info(struct tty_struct *tty, struct uart_state *state, unsigned int __user *value) { struct uart_port *uport = uart_port_check(state); unsigned int result; result = uport->ops->tx_empty(uport); /* * If we're about to load something into the transmit * register, we'll pretend the transmitter isn't empty to * avoid a race condition (depending on when the transmit * interrupt happens). */ if (uport->x_char || (!kfifo_is_empty(&state->port.xmit_fifo) && !uart_tx_stopped(uport))) result &= ~TIOCSER_TEMT; return put_user(result, value); } static int uart_tiocmget(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct tty_port *port = &state->port; struct uart_port *uport; guard(mutex)(&port->mutex); uport = uart_port_check(state); if (!uport || tty_io_error(tty)) return -EIO; guard(uart_port_lock_irq)(uport); return uport->mctrl | uport->ops->get_mctrl(uport); } static int uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct uart_state *state = tty->driver_data; struct tty_port *port = &state->port; struct uart_port *uport; guard(mutex)(&port->mutex); uport = uart_port_check(state); if (!uport || tty_io_error(tty)) return -EIO; uart_update_mctrl(uport, set, clear); return 0; } static int uart_break_ctl(struct tty_struct *tty, int break_state) { struct uart_state *state = tty->driver_data; struct tty_port *port = &state->port; struct uart_port *uport; guard(mutex)(&port->mutex); uport = uart_port_check(state); if (!uport) return -EIO; if (uport->type != PORT_UNKNOWN && uport->ops->break_ctl) uport->ops->break_ctl(uport, break_state); return 0; } static int uart_do_autoconfig(struct tty_struct *tty, struct uart_state *state) { struct tty_port *port = &state->port; struct uart_port *uport; int flags, ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* * Take the per-port semaphore. This prevents count from * changing, and hence any extra opens of the port while * we're auto-configuring. */ scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &port->mutex) { uport = uart_port_check(state); if (!uport) return -EIO; if (tty_port_users(port) != 1) return -EBUSY; uart_shutdown(tty, state); /* * If we already have a port type configured, * we must release its resources. */ if (uport->type != PORT_UNKNOWN && uport->ops->release_port) uport->ops->release_port(uport); flags = UART_CONFIG_TYPE; if (uport->flags & UPF_AUTO_IRQ) flags |= UART_CONFIG_IRQ; /* * This will claim the ports resources if * a port is found. */ uport->ops->config_port(uport, flags); ret = uart_startup(tty, state, true); if (ret < 0) return ret; if (ret > 0) return 0; tty_port_set_initialized(port, true); } return 0; } static void uart_enable_ms(struct uart_port *uport) { /* * Force modem status interrupts on */ if (uport->ops->enable_ms) uport->ops->enable_ms(uport); } /* * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change * - mask passed in arg for lines of interest * (use |'ed TIOCM_RNG/DSR/CD/CTS for masking) * Caller should use TIOCGICOUNT to see which one it was * * FIXME: This wants extracting into a common all driver implementation * of TIOCMWAIT using tty_port. */ static int uart_wait_modem_status(struct uart_state *state, unsigned long arg) { struct uart_port *uport; struct tty_port *port = &state->port; DECLARE_WAITQUEUE(wait, current); struct uart_icount cprev, cnow; int ret; /* * note the counters on entry */ uport = uart_port_ref(state); if (!uport) return -EIO; scoped_guard(uart_port_lock_irq, uport) { memcpy(&cprev, &uport->icount, sizeof(struct uart_icount)); uart_enable_ms(uport); } add_wait_queue(&port->delta_msr_wait, &wait); for (;;) { scoped_guard(uart_port_lock_irq, uport) memcpy(&cnow, &uport->icount, sizeof(struct uart_icount)); set_current_state(TASK_INTERRUPTIBLE); if (((arg & TIOCM_RNG) && (cnow.rng != cprev.rng)) || ((arg & TIOCM_DSR) && (cnow.dsr != cprev.dsr)) || ((arg & TIOCM_CD) && (cnow.dcd != cprev.dcd)) || ((arg & TIOCM_CTS) && (cnow.cts != cprev.cts))) { ret = 0; break; } schedule(); /* see if a signal did it */ if (signal_pending(current)) { ret = -ERESTARTSYS; break; } cprev = cnow; } __set_current_state(TASK_RUNNING); remove_wait_queue(&port->delta_msr_wait, &wait); uart_port_deref(uport); return ret; } /* * Get counter of input serial line interrupts (DCD,RI,DSR,CTS) * Return: write counters to the user passed counter struct * NB: both 1->0 and 0->1 transitions are counted except for * RI where only 0->1 is counted. */ static int uart_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount) { struct uart_state *state = tty->driver_data; struct uart_icount cnow; struct uart_port *uport; unsigned long flags; uport = uart_port_ref_lock(state, &flags); if (!uport) return -EIO; memcpy(&cnow, &uport->icount, sizeof(struct uart_icount)); uart_port_unlock_deref(uport, flags); icount->cts = cnow.cts; icount->dsr = cnow.dsr; icount->rng = cnow.rng; icount->dcd = cnow.dcd; icount->rx = cnow.rx; icount->tx = cnow.tx; icount->frame = cnow.frame; icount->overrun = cnow.overrun; icount->parity = cnow.parity; icount->brk = cnow.brk; icount->buf_overrun = cnow.buf_overrun; return 0; } #define SER_RS485_LEGACY_FLAGS (SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | \ SER_RS485_RTS_AFTER_SEND | SER_RS485_RX_DURING_TX | \ SER_RS485_TERMINATE_BUS) static int uart_check_rs485_flags(struct uart_port *port, struct serial_rs485 *rs485) { u32 flags = rs485->flags; /* Don't return -EINVAL for unsupported legacy flags */ flags &= ~SER_RS485_LEGACY_FLAGS; /* * For any bit outside of the legacy ones that is not supported by * the driver, return -EINVAL. */ if (flags & ~port->rs485_supported.flags) return -EINVAL; /* Asking for address w/o addressing mode? */ if (!(rs485->flags & SER_RS485_ADDRB) && (rs485->flags & (SER_RS485_ADDR_RECV|SER_RS485_ADDR_DEST))) return -EINVAL; /* Address given but not enabled? */ if (!(rs485->flags & SER_RS485_ADDR_RECV) && rs485->addr_recv) return -EINVAL; if (!(rs485->flags & SER_RS485_ADDR_DEST) && rs485->addr_dest) return -EINVAL; return 0; } static void uart_sanitize_serial_rs485_delays(struct uart_port *port, struct serial_rs485 *rs485) { if (!port->rs485_supported.delay_rts_before_send) { if (rs485->delay_rts_before_send) { dev_warn_ratelimited(port->dev, "%s (%u): RTS delay before sending not supported\n", port->name, port->line); } rs485->delay_rts_before_send = 0; } else if (rs485->delay_rts_before_send > RS485_MAX_RTS_DELAY) { rs485->delay_rts_before_send = RS485_MAX_RTS_DELAY; dev_warn_ratelimited(port->dev, "%s (%u): RTS delay before sending clamped to %u ms\n", port->name, port->line, rs485->delay_rts_before_send); } if (!port->rs485_supported.delay_rts_after_send) { if (rs485->delay_rts_after_send) { dev_warn_ratelimited(port->dev, "%s (%u): RTS delay after sending not supported\n", port->name, port->line); } rs485->delay_rts_after_send = 0; } else if (rs485->delay_rts_after_send > RS485_MAX_RTS_DELAY) { rs485->delay_rts_after_send = RS485_MAX_RTS_DELAY; dev_warn_ratelimited(port->dev, "%s (%u): RTS delay after sending clamped to %u ms\n", port->name, port->line, rs485->delay_rts_after_send); } } static void uart_sanitize_serial_rs485(struct uart_port *port, struct serial_rs485 *rs485) { u32 supported_flags = port->rs485_supported.flags; if (!(rs485->flags & SER_RS485_ENABLED)) { memset(rs485, 0, sizeof(*rs485)); return; } /* Clear other RS485 flags but SER_RS485_TERMINATE_BUS and return if enabling RS422 */ if (rs485->flags & SER_RS485_MODE_RS422) { rs485->flags &= (SER_RS485_ENABLED | SER_RS485_MODE_RS422 | SER_RS485_TERMINATE_BUS); return; } rs485->flags &= supported_flags; /* Pick sane settings if the user hasn't */ if (!(rs485->flags & SER_RS485_RTS_ON_SEND) == !(rs485->flags & SER_RS485_RTS_AFTER_SEND)) { if (supported_flags & SER_RS485_RTS_ON_SEND) { rs485->flags |= SER_RS485_RTS_ON_SEND; rs485->flags &= ~SER_RS485_RTS_AFTER_SEND; dev_warn_ratelimited(port->dev, "%s (%u): invalid RTS setting, using RTS_ON_SEND instead\n", port->name, port->line); } else { rs485->flags |= SER_RS485_RTS_AFTER_SEND; rs485->flags &= ~SER_RS485_RTS_ON_SEND; dev_warn_ratelimited(port->dev, "%s (%u): invalid RTS setting, using RTS_AFTER_SEND instead\n", port->name, port->line); } } uart_sanitize_serial_rs485_delays(port, rs485); /* Return clean padding area to userspace */ memset(rs485->padding0, 0, sizeof(rs485->padding0)); memset(rs485->padding1, 0, sizeof(rs485->padding1)); } static void uart_set_rs485_termination(struct uart_port *port, const struct serial_rs485 *rs485) { if (!(rs485->flags & SER_RS485_ENABLED)) return; gpiod_set_value_cansleep(port->rs485_term_gpio, !!(rs485->flags & SER_RS485_TERMINATE_BUS)); } static void uart_set_rs485_rx_during_tx(struct uart_port *port, const struct serial_rs485 *rs485) { if (!(rs485->flags & SER_RS485_ENABLED)) return; gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, !!(rs485->flags & SER_RS485_RX_DURING_TX)); } static int uart_rs485_config(struct uart_port *port) { struct serial_rs485 *rs485 = &port->rs485; int ret; if (!(rs485->flags & SER_RS485_ENABLED)) return 0; uart_sanitize_serial_rs485(port, rs485); uart_set_rs485_termination(port, rs485); uart_set_rs485_rx_during_tx(port, rs485); scoped_guard(uart_port_lock_irqsave, port) ret = port->rs485_config(port, NULL, rs485); if (ret) { memset(rs485, 0, sizeof(*rs485)); /* unset GPIOs */ gpiod_set_value_cansleep(port->rs485_term_gpio, 0); gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, 0); } return ret; } static int uart_get_rs485_config(struct uart_port *port, struct serial_rs485 __user *rs485) { struct serial_rs485 aux; scoped_guard(uart_port_lock_irqsave, port) aux = port->rs485; if (copy_to_user(rs485, &aux, sizeof(aux))) return -EFAULT; return 0; } static int uart_set_rs485_config(struct tty_struct *tty, struct uart_port *port, struct serial_rs485 __user *rs485_user) { struct serial_rs485 rs485; int ret; if (!(port->rs485_supported.flags & SER_RS485_ENABLED)) return -ENOTTY; if (copy_from_user(&rs485, rs485_user, sizeof(*rs485_user))) return -EFAULT; ret = uart_check_rs485_flags(port, &rs485); if (ret) return ret; uart_sanitize_serial_rs485(port, &rs485); uart_set_rs485_termination(port, &rs485); uart_set_rs485_rx_during_tx(port, &rs485); scoped_guard(uart_port_lock_irqsave, port) { ret = port->rs485_config(port, &tty->termios, &rs485); if (!ret) { port->rs485 = rs485; /* Reset RTS and other mctrl lines when disabling RS485 */ if (!(rs485.flags & SER_RS485_ENABLED)) port->ops->set_mctrl(port, port->mctrl); } } if (ret) { /* restore old GPIO settings */ gpiod_set_value_cansleep(port->rs485_term_gpio, !!(port->rs485.flags & SER_RS485_TERMINATE_BUS)); gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, !!(port->rs485.flags & SER_RS485_RX_DURING_TX)); return ret; } if (copy_to_user(rs485_user, &port->rs485, sizeof(port->rs485))) return -EFAULT; return 0; } static int uart_get_iso7816_config(struct uart_port *port, struct serial_iso7816 __user *iso7816) { struct serial_iso7816 aux; if (!port->iso7816_config) return -ENOTTY; scoped_guard(uart_port_lock_irqsave, port) aux = port->iso7816; if (copy_to_user(iso7816, &aux, sizeof(aux))) return -EFAULT; return 0; } static int uart_set_iso7816_config(struct uart_port *port, struct serial_iso7816 __user *iso7816_user) { struct serial_iso7816 iso7816; int i; if (!port->iso7816_config) return -ENOTTY; if (copy_from_user(&iso7816, iso7816_user, sizeof(*iso7816_user))) return -EFAULT; /* * There are 5 words reserved for future use. Check that userspace * doesn't put stuff in there to prevent breakages in the future. */ for (i = 0; i < ARRAY_SIZE(iso7816.reserved); i++) if (iso7816.reserved[i]) return -EINVAL; scoped_guard(uart_port_lock_irqsave, port) { int ret = port->iso7816_config(port, &iso7816); if (ret) return ret; } if (copy_to_user(iso7816_user, &port->iso7816, sizeof(port->iso7816))) return -EFAULT; return 0; } /* * Called via sys_ioctl. We can use spin_lock_irq() here. */ static int uart_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct uart_state *state = tty->driver_data; struct tty_port *port = &state->port; struct uart_port *uport; void __user *uarg = (void __user *)arg; int ret = -ENOIOCTLCMD; /* This ioctl doesn't rely on the hardware to be present. */ if (cmd == TIOCSERCONFIG) { guard(rwsem_write)(&tty->termios_rwsem); return uart_do_autoconfig(tty, state); } if (tty_io_error(tty)) return -EIO; /* This should only be used when the hardware is present. */ if (cmd == TIOCMIWAIT) return uart_wait_modem_status(state, arg); /* rs485_config requires more locking than others */ if (cmd == TIOCSRS485) down_write(&tty->termios_rwsem); scoped_guard(mutex, &port->mutex) { uport = uart_port_check(state); if (!uport || tty_io_error(tty)) { ret = -EIO; break; } /* * All these rely on hardware being present and need to be * protected against the tty being hung up. */ switch (cmd) { case TIOCSERGETLSR: /* Get line status register */ ret = uart_get_lsr_info(tty, state, uarg); break; case TIOCGRS485: ret = uart_get_rs485_config(uport, uarg); break; case TIOCSRS485: ret = uart_set_rs485_config(tty, uport, uarg); break; case TIOCSISO7816: ret = uart_set_iso7816_config(state->uart_port, uarg); break; case TIOCGISO7816: ret = uart_get_iso7816_config(state->uart_port, uarg); break; default: if (uport->ops->ioctl) ret = uport->ops->ioctl(uport, cmd, arg); break; } } if (cmd == TIOCSRS485) up_write(&tty->termios_rwsem); return ret; } static void uart_set_ldisc(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct uart_port *uport; struct tty_port *port = &state->port; if (!tty_port_initialized(port)) return; guard(mutex)(&state->port.mutex); uport = uart_port_check(state); if (uport && uport->ops->set_ldisc) uport->ops->set_ldisc(uport, &tty->termios); } static void uart_set_termios(struct tty_struct *tty, const struct ktermios *old_termios) { struct uart_state *state = tty->driver_data; struct uart_port *uport; unsigned int cflag = tty->termios.c_cflag; unsigned int iflag_mask = IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK; bool sw_changed = false; guard(mutex)(&state->port.mutex); uport = uart_port_check(state); if (!uport) return; /* * Drivers doing software flow control also need to know * about changes to these input settings. */ if (uport->flags & UPF_SOFT_FLOW) { iflag_mask |= IXANY|IXON|IXOFF; sw_changed = tty->termios.c_cc[VSTART] != old_termios->c_cc[VSTART] || tty->termios.c_cc[VSTOP] != old_termios->c_cc[VSTOP]; } /* * These are the bits that are used to setup various * flags in the low level driver. We can ignore the Bfoo * bits in c_cflag; c_[io]speed will always be set * appropriately by set_termios() in tty_ioctl.c */ if ((cflag ^ old_termios->c_cflag) == 0 && tty->termios.c_ospeed == old_termios->c_ospeed && tty->termios.c_ispeed == old_termios->c_ispeed && ((tty->termios.c_iflag ^ old_termios->c_iflag) & iflag_mask) == 0 && !sw_changed) return; uart_change_line_settings(tty, state, old_termios); /* reload cflag from termios; port driver may have overridden flags */ cflag = tty->termios.c_cflag; /* Handle transition to B0 status */ if (((old_termios->c_cflag & CBAUD) != B0) && ((cflag & CBAUD) == B0)) uart_clear_mctrl(uport, TIOCM_RTS | TIOCM_DTR); /* Handle transition away from B0 status */ else if (((old_termios->c_cflag & CBAUD) == B0) && ((cflag & CBAUD) != B0)) { unsigned int mask = TIOCM_DTR; if (!(cflag & CRTSCTS) || !tty_throttled(tty)) mask |= TIOCM_RTS; uart_set_mctrl(uport, mask); } } /* * Calls to uart_close() are serialised via the tty_lock in * drivers/tty/tty_io.c:tty_release() * drivers/tty/tty_io.c:do_tty_hangup() */ static void uart_close(struct tty_struct *tty, struct file *filp) { struct uart_state *state = tty->driver_data; if (!state) { struct uart_driver *drv = tty->driver->driver_state; struct tty_port *port; state = drv->state + tty->index; port = &state->port; guard(spinlock_irq)(&port->lock); --port->count; return; } pr_debug("uart_close(%d) called\n", tty->index); tty_port_close(tty->port, tty, filp); } static void uart_tty_port_shutdown(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = uart_port_check(state); /* * At this point, we stop accepting input. To do this, we * disable the receive line status interrupts. */ if (WARN(!uport, "detached port still initialized!\n")) return; scoped_guard(uart_port_lock_irq, uport) uport->ops->stop_rx(uport); serial_base_port_shutdown(uport); uart_port_shutdown(port); /* * It's possible for shutdown to be called after suspend if we get * a DCD drop (hangup) at just the right time. Clear suspended bit so * we don't try to resume a port that has been shutdown. */ tty_port_set_suspended(port, false); uart_free_xmit_buf(port); uart_change_pm(state, UART_PM_STATE_OFF); } static void uart_wait_until_sent(struct tty_struct *tty, int timeout) { struct uart_state *state = tty->driver_data; struct uart_port *port; unsigned long char_time, expire, fifo_timeout; port = uart_port_ref(state); if (!port) return; if (port->type == PORT_UNKNOWN || port->fifosize == 0) { uart_port_deref(port); return; } /* * Set the check interval to be 1/5 of the estimated time to * send a single character, and make it at least 1. The check * interval should also be less than the timeout. * * Note: we have to use pretty tight timings here to satisfy * the NIST-PCTS. */ char_time = max(nsecs_to_jiffies(port->frame_time / 5), 1UL); if (timeout && timeout < char_time) char_time = timeout; if (!uart_cts_enabled(port)) { /* * If the transmitter hasn't cleared in twice the approximate * amount of time to send the entire FIFO, it probably won't * ever clear. This assumes the UART isn't doing flow * control, which is currently the case. Hence, if it ever * takes longer than FIFO timeout, this is probably due to a * UART bug of some kind. So, we clamp the timeout parameter at * 2 * FIFO timeout. */ fifo_timeout = uart_fifo_timeout(port); if (timeout == 0 || timeout > 2 * fifo_timeout) timeout = 2 * fifo_timeout; } expire = jiffies + timeout; pr_debug("uart_wait_until_sent(%u), jiffies=%lu, expire=%lu...\n", port->line, jiffies, expire); /* * Check whether the transmitter is empty every 'char_time'. * 'timeout' / 'expire' give us the maximum amount of time * we wait. */ while (!port->ops->tx_empty(port)) { msleep_interruptible(jiffies_to_msecs(char_time)); if (signal_pending(current)) break; if (timeout && time_after(jiffies, expire)) break; } uart_port_deref(port); } /* * Calls to uart_hangup() are serialised by the tty_lock in * drivers/tty/tty_io.c:do_tty_hangup() * This runs from a workqueue and can sleep for a _short_ time only. */ static void uart_hangup(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; struct tty_port *port = &state->port; struct uart_port *uport; pr_debug("uart_hangup(%d)\n", tty->index); guard(mutex)(&port->mutex); uport = uart_port_check(state); WARN(!uport, "hangup of detached port!\n"); if (tty_port_active(port)) { uart_flush_buffer(tty); uart_shutdown(tty, state); scoped_guard(spinlock_irqsave, &port->lock) port->count = 0; tty_port_set_active(port, false); tty_port_tty_set(port, NULL); if (uport && !uart_console(uport)) uart_change_pm(state, UART_PM_STATE_OFF); wake_up_interruptible(&port->open_wait); wake_up_interruptible(&port->delta_msr_wait); } } /* uport == NULL if uart_port has already been removed */ static void uart_port_shutdown(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = uart_port_check(state); /* * clear delta_msr_wait queue to avoid mem leaks: we may free * the irq here so the queue might never be woken up. Note * that we won't end up waiting on delta_msr_wait again since * any outstanding file descriptors should be pointing at * hung_up_tty_fops now. */ wake_up_interruptible(&port->delta_msr_wait); if (uport) { /* Free the IRQ and disable the port. */ uport->ops->shutdown(uport); /* Ensure that the IRQ handler isn't running on another CPU. */ synchronize_irq(uport->irq); } } static bool uart_carrier_raised(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; unsigned long flags; int mctrl; uport = uart_port_ref_lock(state, &flags); /* * Should never observe uport == NULL since checks for hangup should * abort the tty_port_block_til_ready() loop before checking for carrier * raised -- but report carrier raised if it does anyway so open will * continue and not sleep */ if (WARN_ON(!uport)) return true; uart_enable_ms(uport); mctrl = uport->ops->get_mctrl(uport); uart_port_unlock_deref(uport, flags); return mctrl & TIOCM_CAR; } static void uart_dtr_rts(struct tty_port *port, bool active) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; uport = uart_port_ref(state); if (!uport) return; uart_port_dtr_rts(uport, active); uart_port_deref(uport); } static int uart_install(struct tty_driver *driver, struct tty_struct *tty) { struct uart_driver *drv = driver->driver_state; struct uart_state *state = drv->state + tty->index; tty->driver_data = state; return tty_standard_install(driver, tty); } /* * Calls to uart_open are serialised by the tty_lock in * drivers/tty/tty_io.c:tty_open() * Note that if this fails, then uart_close() _will_ be called. * * In time, we want to scrap the "opening nonpresent ports" * behaviour and implement an alternative way for setserial * to set base addresses/ports/types. This will allow us to * get rid of a certain amount of extra tests. */ static int uart_open(struct tty_struct *tty, struct file *filp) { struct uart_state *state = tty->driver_data; int retval; retval = tty_port_open(&state->port, tty, filp); if (retval > 0) retval = 0; return retval; } static int uart_port_activate(struct tty_port *port, struct tty_struct *tty) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; int ret; uport = uart_port_check(state); if (!uport || uport->flags & UPF_DEAD) return -ENXIO; /* * Start up the serial port. */ ret = uart_startup(tty, state, false); if (ret > 0) tty_port_set_active(port, true); return ret; } static const char *uart_type(struct uart_port *port) { const char *str = NULL; if (port->ops->type) str = port->ops->type(port); if (!str) str = "unknown"; return str; } #ifdef CONFIG_PROC_FS static void uart_line_info(struct seq_file *m, struct uart_state *state) { struct tty_port *port = &state->port; enum uart_pm_state pm_state; struct uart_port *uport; char stat_buf[32]; unsigned int status; int mmio; guard(mutex)(&port->mutex); uport = uart_port_check(state); if (!uport) return; mmio = uport->iotype >= UPIO_MEM; seq_printf(m, "%u: uart:%s %s%08llX irq:%u", uport->line, uart_type(uport), mmio ? "mmio:0x" : "port:", mmio ? (unsigned long long)uport->mapbase : (unsigned long long)uport->iobase, uport->irq); if (uport->type == PORT_UNKNOWN) { seq_putc(m, '\n'); return; } if (capable(CAP_SYS_ADMIN)) { pm_state = state->pm_state; if (pm_state != UART_PM_STATE_ON) uart_change_pm(state, UART_PM_STATE_ON); scoped_guard(uart_port_lock_irq, uport) status = uport->ops->get_mctrl(uport); if (pm_state != UART_PM_STATE_ON) uart_change_pm(state, pm_state); seq_printf(m, " tx:%u rx:%u", uport->icount.tx, uport->icount.rx); if (uport->icount.frame) seq_printf(m, " fe:%u", uport->icount.frame); if (uport->icount.parity) seq_printf(m, " pe:%u", uport->icount.parity); if (uport->icount.brk) seq_printf(m, " brk:%u", uport->icount.brk); if (uport->icount.overrun) seq_printf(m, " oe:%u", uport->icount.overrun); if (uport->icount.buf_overrun) seq_printf(m, " bo:%u", uport->icount.buf_overrun); #define INFOBIT(bit, str) \ if (uport->mctrl & (bit)) \ strncat(stat_buf, (str), sizeof(stat_buf) - \ strlen(stat_buf) - 2) #define STATBIT(bit, str) \ if (status & (bit)) \ strncat(stat_buf, (str), sizeof(stat_buf) - \ strlen(stat_buf) - 2) stat_buf[0] = '\0'; stat_buf[1] = '\0'; INFOBIT(TIOCM_RTS, "|RTS"); STATBIT(TIOCM_CTS, "|CTS"); INFOBIT(TIOCM_DTR, "|DTR"); STATBIT(TIOCM_DSR, "|DSR"); STATBIT(TIOCM_CAR, "|CD"); STATBIT(TIOCM_RNG, "|RI"); if (stat_buf[0]) stat_buf[0] = ' '; seq_puts(m, stat_buf); } seq_putc(m, '\n'); #undef STATBIT #undef INFOBIT } static int uart_proc_show(struct seq_file *m, void *v) { struct tty_driver *ttydrv = m->private; struct uart_driver *drv = ttydrv->driver_state; int i; seq_printf(m, "serinfo:1.0 driver%s%s revision:%s\n", "", "", ""); for (i = 0; i < drv->nr; i++) uart_line_info(m, drv->state + i); return 0; } #endif static void uart_port_spin_lock_init(struct uart_port *port) { spin_lock_init(&port->lock); lockdep_set_class(&port->lock, &port_lock_key); } #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(CONFIG_CONSOLE_POLL) /** * uart_console_write - write a console message to a serial port * @port: the port to write the message * @s: array of characters * @count: number of characters in string to write * @putchar: function to write character to port */ void uart_console_write(struct uart_port *port, const char *s, unsigned int count, void (*putchar)(struct uart_port *, unsigned char)) { unsigned int i; for (i = 0; i < count; i++, s++) { if (*s == '\n') putchar(port, '\r'); putchar(port, *s); } } EXPORT_SYMBOL_GPL(uart_console_write); /** * uart_parse_earlycon - Parse earlycon options * @p: ptr to 2nd field (ie., just beyond '<name>,') * @iotype: ptr for decoded iotype (out) * @addr: ptr for decoded mapbase/iobase (out) * @options: ptr for <options> field; %NULL if not present (out) * * Decodes earlycon kernel command line parameters of the form: * * earlycon=<name>,io|mmio|mmio16|mmio32|mmio32be|mmio32native,<addr>,<options> * * console=<name>,io|mmio|mmio16|mmio32|mmio32be|mmio32native,<addr>,<options> * * The optional form: * * earlycon=<name>,0x<addr>,<options> * * console=<name>,0x<addr>,<options> * * is also accepted; the returned @iotype will be %UPIO_MEM. * * Returns: 0 on success or -%EINVAL on failure */ int uart_parse_earlycon(char *p, enum uart_iotype *iotype, resource_size_t *addr, char **options) { if (strncmp(p, "mmio,", 5) == 0) { *iotype = UPIO_MEM; p += 5; } else if (strncmp(p, "mmio16,", 7) == 0) { *iotype = UPIO_MEM16; p += 7; } else if (strncmp(p, "mmio32,", 7) == 0) { *iotype = UPIO_MEM32; p += 7; } else if (strncmp(p, "mmio32be,", 9) == 0) { *iotype = UPIO_MEM32BE; p += 9; } else if (strncmp(p, "mmio32native,", 13) == 0) { *iotype = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? UPIO_MEM32BE : UPIO_MEM32; p += 13; } else if (strncmp(p, "io,", 3) == 0) { *iotype = UPIO_PORT; p += 3; } else if (strncmp(p, "0x", 2) == 0) { *iotype = UPIO_MEM; } else { return -EINVAL; } /* * Before you replace it with kstrtoull(), think about options separator * (',') it will not tolerate */ *addr = simple_strtoull(p, NULL, 0); p = strchr(p, ','); if (p) p++; *options = p; return 0; } EXPORT_SYMBOL_GPL(uart_parse_earlycon); /** * uart_parse_options - Parse serial port baud/parity/bits/flow control. * @options: pointer to option string * @baud: pointer to an 'int' variable for the baud rate. * @parity: pointer to an 'int' variable for the parity. * @bits: pointer to an 'int' variable for the number of data bits. * @flow: pointer to an 'int' variable for the flow control character. * * uart_parse_options() decodes a string containing the serial console * options. The format of the string is <baud><parity><bits><flow>, * eg: 115200n8r */ void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow) { const char *s = options; *baud = simple_strtoul(s, NULL, 10); while (*s >= '0' && *s <= '9') s++; if (*s) *parity = *s++; if (*s) *bits = *s++ - '0'; if (*s) *flow = *s; } EXPORT_SYMBOL_GPL(uart_parse_options); /** * uart_set_options - setup the serial console parameters * @port: pointer to the serial ports uart_port structure * @co: console pointer * @baud: baud rate * @parity: parity character - 'n' (none), 'o' (odd), 'e' (even) * @bits: number of data bits * @flow: flow control character - 'r' (rts) * * Locking: Caller must hold console_list_lock in order to serialize * early initialization of the serial-console lock. */ int uart_set_options(struct uart_port *port, struct console *co, int baud, int parity, int bits, int flow) { struct ktermios termios; static struct ktermios dummy; /* * Ensure that the serial-console lock is initialised early. * * Note that the console-registered check is needed because * kgdboc can call uart_set_options() for an already registered * console via tty_find_polling_driver() and uart_poll_init(). */ if (!uart_console_registered_locked(port) && !port->console_reinit) uart_port_spin_lock_init(port); memset(&termios, 0, sizeof(struct ktermios)); termios.c_cflag |= CREAD | HUPCL | CLOCAL; tty_termios_encode_baud_rate(&termios, baud, baud); if (bits == 7) termios.c_cflag |= CS7; else termios.c_cflag |= CS8; switch (parity) { case 'o': case 'O': termios.c_cflag |= PARODD; fallthrough; case 'e': case 'E': termios.c_cflag |= PARENB; break; } if (flow == 'r') termios.c_cflag |= CRTSCTS; /* * some uarts on other side don't support no flow control. * So we set * DTR in host uart to make them happy */ port->mctrl |= TIOCM_DTR; port->ops->set_termios(port, &termios, &dummy); /* * Allow the setting of the UART parameters with a NULL console * too: */ if (co) { co->cflag = termios.c_cflag; co->ispeed = termios.c_ispeed; co->ospeed = termios.c_ospeed; } return 0; } EXPORT_SYMBOL_GPL(uart_set_options); #endif /* CONFIG_SERIAL_CORE_CONSOLE */ /** * uart_change_pm - set power state of the port * * @state: port descriptor * @pm_state: new state * * Locking: port->mutex has to be held */ static void uart_change_pm(struct uart_state *state, enum uart_pm_state pm_state) { struct uart_port *port = uart_port_check(state); if (state->pm_state != pm_state) { if (port && port->ops->pm) port->ops->pm(port, pm_state, state->pm_state); state->pm_state = pm_state; } } struct uart_match { struct uart_port *port; struct uart_driver *driver; }; static int serial_match_port(struct device *dev, const void *data) { const struct uart_match *match = data; struct tty_driver *tty_drv = match->driver->tty_driver; dev_t devt = MKDEV(tty_drv->major, tty_drv->minor_start) + match->port->line; return dev->devt == devt; /* Actually, only one tty per port */ } int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport) { struct uart_state *state = drv->state + uport->line; struct tty_port *port = &state->port; struct device *tty_dev; struct uart_match match = {uport, drv}; guard(mutex)(&port->mutex); tty_dev = device_find_child(&uport->port_dev->dev, &match, serial_match_port); if (tty_dev && device_may_wakeup(tty_dev)) { enable_irq_wake(uport->irq); put_device(tty_dev); return 0; } put_device(tty_dev); /* * Nothing to do if the console is not suspending * except stop_rx to prevent any asynchronous data * over RX line. However ensure that we will be * able to Re-start_rx later. */ if (!console_suspend_enabled && uart_console(uport)) { if (uport->ops->start_rx) { guard(uart_port_lock_irq)(uport); uport->ops->stop_rx(uport); } device_set_awake_path(uport->dev); return 0; } uport->suspended = 1; if (tty_port_initialized(port)) { const struct uart_ops *ops = uport->ops; int tries; unsigned int mctrl; tty_port_set_suspended(port, true); tty_port_set_initialized(port, false); scoped_guard(uart_port_lock_irq, uport) { ops->stop_tx(uport); if (!(uport->rs485.flags & SER_RS485_ENABLED)) ops->set_mctrl(uport, 0); /* save mctrl so it can be restored on resume */ mctrl = uport->mctrl; uport->mctrl = 0; ops->stop_rx(uport); } /* * Wait for the transmitter to empty. */ for (tries = 3; !ops->tx_empty(uport) && tries; tries--) msleep(10); if (!tries) dev_err(uport->dev, "%s: Unable to drain transmitter\n", uport->name); ops->shutdown(uport); uport->mctrl = mctrl; } /* * Suspend the console device before suspending the port. */ if (uart_console(uport)) console_suspend(uport->cons); uart_change_pm(state, UART_PM_STATE_OFF); return 0; } EXPORT_SYMBOL(uart_suspend_port); int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) { struct uart_state *state = drv->state + uport->line; struct tty_port *port = &state->port; struct device *tty_dev; struct uart_match match = {uport, drv}; struct ktermios termios; guard(mutex)(&port->mutex); tty_dev = device_find_child(&uport->port_dev->dev, &match, serial_match_port); if (!uport->suspended && device_may_wakeup(tty_dev)) { if (irqd_is_wakeup_set(irq_get_irq_data((uport->irq)))) disable_irq_wake(uport->irq); put_device(tty_dev); return 0; } put_device(tty_dev); uport->suspended = 0; /* * Re-enable the console device after suspending. */ if (uart_console(uport)) { /* * First try to use the console cflag setting. */ memset(&termios, 0, sizeof(struct ktermios)); termios.c_cflag = uport->cons->cflag; termios.c_ispeed = uport->cons->ispeed; termios.c_ospeed = uport->cons->ospeed; /* * If that's unset, use the tty termios setting. */ if (port->tty && termios.c_cflag == 0) termios = port->tty->termios; if (console_suspend_enabled) uart_change_pm(state, UART_PM_STATE_ON); uport->ops->set_termios(uport, &termios, NULL); if (!console_suspend_enabled && uport->ops->start_rx) { guard(uart_port_lock_irq)(uport); uport->ops->start_rx(uport); } if (console_suspend_enabled) console_resume(uport->cons); } if (tty_port_suspended(port)) { const struct uart_ops *ops = uport->ops; int ret; uart_change_pm(state, UART_PM_STATE_ON); scoped_guard(uart_port_lock_irq, uport) if (!(uport->rs485.flags & SER_RS485_ENABLED)) ops->set_mctrl(uport, 0); if (console_suspend_enabled || !uart_console(uport)) { /* Protected by port mutex for now */ struct tty_struct *tty = port->tty; ret = ops->startup(uport); if (ret == 0) { if (tty) uart_change_line_settings(tty, state, NULL); uart_rs485_config(uport); scoped_guard(uart_port_lock_irq, uport) { if (!(uport->rs485.flags & SER_RS485_ENABLED)) ops->set_mctrl(uport, uport->mctrl); ops->start_tx(uport); } tty_port_set_initialized(port, true); } else { /* * Failed to resume - maybe hardware went away? * Clear the "initialized" flag so we won't try * to call the low level drivers shutdown method. */ uart_shutdown(tty, state); } } tty_port_set_suspended(port, false); } return 0; } EXPORT_SYMBOL(uart_resume_port); static inline void uart_report_port(struct uart_driver *drv, struct uart_port *port) { char address[64]; switch (port->iotype) { case UPIO_PORT: snprintf(address, sizeof(address), "I/O 0x%lx", port->iobase); break; case UPIO_HUB6: snprintf(address, sizeof(address), "I/O 0x%lx offset 0x%x", port->iobase, port->hub6); break; case UPIO_MEM: case UPIO_MEM16: case UPIO_MEM32: case UPIO_MEM32BE: case UPIO_AU: case UPIO_TSI: snprintf(address, sizeof(address), "MMIO 0x%llx", (unsigned long long)port->mapbase); break; default: strscpy(address, "*unknown*", sizeof(address)); break; } pr_info("%s%s%s at %s (irq = %u, base_baud = %u) is a %s\n", port->dev ? dev_name(port->dev) : "", port->dev ? ": " : "", port->name, address, port->irq, port->uartclk / 16, uart_type(port)); /* The magic multiplier feature is a bit obscure, so report it too. */ if (port->flags & UPF_MAGIC_MULTIPLIER) pr_info("%s%s%s extra baud rates supported: %u, %u", port->dev ? dev_name(port->dev) : "", port->dev ? ": " : "", port->name, port->uartclk / 8, port->uartclk / 4); } static void uart_configure_port(struct uart_driver *drv, struct uart_state *state, struct uart_port *port) { unsigned int flags; /* * If there isn't a port here, don't do anything further. */ if (!port->iobase && !port->mapbase && !port->membase) return; /* * Now do the auto configuration stuff. Note that config_port * is expected to claim the resources and map the port for us. */ flags = 0; if (port->flags & UPF_AUTO_IRQ) flags |= UART_CONFIG_IRQ; if (port->flags & UPF_BOOT_AUTOCONF) { if (!(port->flags & UPF_FIXED_TYPE)) { port->type = PORT_UNKNOWN; flags |= UART_CONFIG_TYPE; } /* Synchronize with possible boot console. */ if (uart_console(port)) console_lock(); port->ops->config_port(port, flags); if (uart_console(port)) console_unlock(); } if (port->type != PORT_UNKNOWN) { uart_report_port(drv, port); /* Synchronize with possible boot console. */ if (uart_console(port)) console_lock(); /* Power up port for set_mctrl() */ uart_change_pm(state, UART_PM_STATE_ON); /* * Ensure that the modem control lines are de-activated. * keep the DTR setting that is set in uart_set_options() * We probably don't need a spinlock around this, but */ scoped_guard(uart_port_lock_irqsave, port) { port->mctrl &= TIOCM_DTR; if (!(port->rs485.flags & SER_RS485_ENABLED)) port->ops->set_mctrl(port, port->mctrl); } uart_rs485_config(port); if (uart_console(port)) console_unlock(); /* * If this driver supports console, and it hasn't been * successfully registered yet, try to re-register it. * It may be that the port was not available. */ if (port->cons && !console_is_registered(port->cons)) register_console(port->cons); /* * Power down all ports by default, except the * console if we have one. */ if (!uart_console(port)) uart_change_pm(state, UART_PM_STATE_OFF); } } #ifdef CONFIG_CONSOLE_POLL static int uart_poll_init(struct tty_driver *driver, int line, char *options) { struct uart_driver *drv = driver->driver_state; struct uart_state *state = drv->state + line; enum uart_pm_state pm_state; struct tty_port *tport; struct uart_port *port; int baud = 9600; int bits = 8; int parity = 'n'; int flow = 'n'; int ret = 0; tport = &state->port; guard(mutex)(&tport->mutex); port = uart_port_check(state); if (!port || port->type == PORT_UNKNOWN || !(port->ops->poll_get_char && port->ops->poll_put_char)) return -1; pm_state = state->pm_state; uart_change_pm(state, UART_PM_STATE_ON); if (port->ops->poll_init) { /* * We don't set initialized as we only initialized the hw, * e.g. state->xmit is still uninitialized. */ if (!tty_port_initialized(tport)) ret = port->ops->poll_init(port); } if (!ret && options) { uart_parse_options(options, &baud, &parity, &bits, &flow); console_list_lock(); ret = uart_set_options(port, NULL, baud, parity, bits, flow); console_list_unlock(); } if (ret) uart_change_pm(state, pm_state); return ret; } static int uart_poll_get_char(struct tty_driver *driver, int line) { struct uart_driver *drv = driver->driver_state; struct uart_state *state = drv->state + line; struct uart_port *port; int ret = -1; port = uart_port_ref(state); if (port) { ret = port->ops->poll_get_char(port); uart_port_deref(port); } return ret; } static void uart_poll_put_char(struct tty_driver *driver, int line, char ch) { struct uart_driver *drv = driver->driver_state; struct uart_state *state = drv->state + line; struct uart_port *port; port = uart_port_ref(state); if (!port) return; if (ch == '\n') port->ops->poll_put_char(port, '\r'); port->ops->poll_put_char(port, ch); uart_port_deref(port); } #endif static const struct tty_operations uart_ops = { .install = uart_install, .open = uart_open, .close = uart_close, .write = uart_write, .put_char = uart_put_char, .flush_chars = uart_flush_chars, .write_room = uart_write_room, .chars_in_buffer= uart_chars_in_buffer, .flush_buffer = uart_flush_buffer, .ioctl = uart_ioctl, .throttle = uart_throttle, .unthrottle = uart_unthrottle, .send_xchar = uart_send_xchar, .set_termios = uart_set_termios, .set_ldisc = uart_set_ldisc, .stop = uart_stop, .start = uart_start, .hangup = uart_hangup, .break_ctl = uart_break_ctl, .wait_until_sent= uart_wait_until_sent, #ifdef CONFIG_PROC_FS .proc_show = uart_proc_show, #endif .tiocmget = uart_tiocmget, .tiocmset = uart_tiocmset, .set_serial = uart_set_info_user, .get_serial = uart_get_info_user, .get_icount = uart_get_icount, #ifdef CONFIG_CONSOLE_POLL .poll_init = uart_poll_init, .poll_get_char = uart_poll_get_char, .poll_put_char = uart_poll_put_char, #endif }; static const struct tty_port_operations uart_port_ops = { .carrier_raised = uart_carrier_raised, .dtr_rts = uart_dtr_rts, .activate = uart_port_activate, .shutdown = uart_tty_port_shutdown, }; /** * uart_register_driver - register a driver with the uart core layer * @drv: low level driver structure * * Register a uart driver with the core driver. We in turn register with the * tty layer, and initialise the core driver per-port state. * * We have a proc file in /proc/tty/driver which is named after the normal * driver. * * @drv->port should be %NULL, and the per-port structures should be registered * using uart_add_one_port() after this call has succeeded. * * Locking: none, Interrupts: enabled */ int uart_register_driver(struct uart_driver *drv) { struct tty_driver *normal; int i, retval = -ENOMEM; BUG_ON(drv->state); /* * Maybe we should be using a slab cache for this, especially if * we have a large number of ports to handle. */ drv->state = kcalloc(drv->nr, sizeof(struct uart_state), GFP_KERNEL); if (!drv->state) goto out; normal = tty_alloc_driver(drv->nr, TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV); if (IS_ERR(normal)) { retval = PTR_ERR(normal); goto out_kfree; } drv->tty_driver = normal; normal->driver_name = drv->driver_name; normal->name = drv->dev_name; normal->major = drv->major; normal->minor_start = drv->minor; normal->type = TTY_DRIVER_TYPE_SERIAL; normal->subtype = SERIAL_TYPE_NORMAL; normal->init_termios = tty_std_termios; normal->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; normal->init_termios.c_ispeed = normal->init_termios.c_ospeed = 9600; normal->driver_state = drv; tty_set_operations(normal, &uart_ops); /* * Initialise the UART state(s). */ for (i = 0; i < drv->nr; i++) { struct uart_state *state = drv->state + i; struct tty_port *port = &state->port; tty_port_init(port); port->ops = &uart_port_ops; } retval = tty_register_driver(normal); if (retval >= 0) return retval; for (i = 0; i < drv->nr; i++) tty_port_destroy(&drv->state[i].port); tty_driver_kref_put(normal); out_kfree: kfree(drv->state); out: return retval; } EXPORT_SYMBOL(uart_register_driver); /** * uart_unregister_driver - remove a driver from the uart core layer * @drv: low level driver structure * * Remove all references to a driver from the core driver. The low level * driver must have removed all its ports via the uart_remove_one_port() if it * registered them with uart_add_one_port(). (I.e. @drv->port is %NULL.) * * Locking: none, Interrupts: enabled */ void uart_unregister_driver(struct uart_driver *drv) { struct tty_driver *p = drv->tty_driver; unsigned int i; tty_unregister_driver(p); tty_driver_kref_put(p); for (i = 0; i < drv->nr; i++) tty_port_destroy(&drv->state[i].port); kfree(drv->state); drv->state = NULL; drv->tty_driver = NULL; } EXPORT_SYMBOL(uart_unregister_driver); struct tty_driver *uart_console_device(struct console *co, int *index) { struct uart_driver *p = co->data; *index = co->index; return p->tty_driver; } EXPORT_SYMBOL_GPL(uart_console_device); static ssize_t uartclk_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%d\n", tmp.baud_base * 16); } static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%d\n", tmp.type); } static ssize_t line_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%d\n", tmp.line); } static ssize_t port_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); unsigned long ioaddr; uart_get_info(port, &tmp); ioaddr = tmp.port; if (HIGH_BITS_OFFSET) ioaddr |= (unsigned long)tmp.port_high << HIGH_BITS_OFFSET; return sprintf(buf, "0x%lX\n", ioaddr); } static ssize_t irq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%d\n", tmp.irq); } static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "0x%X\n", tmp.flags); } static ssize_t xmit_fifo_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%d\n", tmp.xmit_fifo_size); } static ssize_t close_delay_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%u\n", tmp.close_delay); } static ssize_t closing_wait_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%u\n", tmp.closing_wait); } static ssize_t custom_divisor_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%d\n", tmp.custom_divisor); } static ssize_t io_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%u\n", tmp.io_type); } static ssize_t iomem_base_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "0x%lX\n", (unsigned long)tmp.iomem_base); } static ssize_t iomem_reg_shift_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serial_struct tmp; struct tty_port *port = dev_get_drvdata(dev); uart_get_info(port, &tmp); return sprintf(buf, "%u\n", tmp.iomem_reg_shift); } static ssize_t console_show(struct device *dev, struct device_attribute *attr, char *buf) { struct tty_port *port = dev_get_drvdata(dev); struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; bool console = false; scoped_guard(mutex, &port->mutex) { uport = uart_port_check(state); if (uport) console = uart_console_registered(uport); } return sprintf(buf, "%c\n", console ? 'Y' : 'N'); } static ssize_t console_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct tty_port *port = dev_get_drvdata(dev); struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; bool oldconsole, newconsole; int ret; ret = kstrtobool(buf, &newconsole); if (ret) return ret; guard(mutex)(&port->mutex); uport = uart_port_check(state); if (!uport) return -ENXIO; oldconsole = uart_console_registered(uport); if (oldconsole && !newconsole) { ret = unregister_console(uport->cons); if (ret < 0) return ret; } else if (!oldconsole && newconsole) { if (!uart_console(uport)) return -ENOENT; uport->console_reinit = 1; register_console(uport->cons); } return count; } static DEVICE_ATTR_RO(uartclk); static DEVICE_ATTR_RO(type); static DEVICE_ATTR_RO(line); static DEVICE_ATTR_RO(port); static DEVICE_ATTR_RO(irq); static DEVICE_ATTR_RO(flags); static DEVICE_ATTR_RO(xmit_fifo_size); static DEVICE_ATTR_RO(close_delay); static DEVICE_ATTR_RO(closing_wait); static DEVICE_ATTR_RO(custom_divisor); static DEVICE_ATTR_RO(io_type); static DEVICE_ATTR_RO(iomem_base); static DEVICE_ATTR_RO(iomem_reg_shift); static DEVICE_ATTR_RW(console); static struct attribute *tty_dev_attrs[] = { &dev_attr_uartclk.attr, &dev_attr_type.attr, &dev_attr_line.attr, &dev_attr_port.attr, &dev_attr_irq.attr, &dev_attr_flags.attr, &dev_attr_xmit_fifo_size.attr, &dev_attr_close_delay.attr, &dev_attr_closing_wait.attr, &dev_attr_custom_divisor.attr, &dev_attr_io_type.attr, &dev_attr_iomem_base.attr, &dev_attr_iomem_reg_shift.attr, &dev_attr_console.attr, NULL }; static const struct attribute_group tty_dev_attr_group = { .attrs = tty_dev_attrs, }; /** * serial_core_add_one_port - attach a driver-defined port structure * @drv: pointer to the uart low level driver structure for this port * @uport: uart port structure to use for this port. * * Context: task context, might sleep * * This allows the driver @drv to register its own uart_port structure with the * core driver. The main purpose is to allow the low level uart drivers to * expand uart_port, rather than having yet more levels of structures. * Caller must hold port_mutex. */ static int serial_core_add_one_port(struct uart_driver *drv, struct uart_port *uport) { struct uart_state *state; struct tty_port *port; struct device *tty_dev; int num_groups; if (uport->line >= drv->nr) return -EINVAL; state = drv->state + uport->line; port = &state->port; guard(mutex)(&port->mutex); if (state->uart_port) return -EINVAL; /* Link the port to the driver state table and vice versa */ atomic_set(&state->refcount, 1); init_waitqueue_head(&state->remove_wait); state->uart_port = uport; uport->state = state; /* * If this port is in use as a console then the spinlock is already * initialised. */ if (!uart_console_registered(uport)) uart_port_spin_lock_init(uport); state->pm_state = UART_PM_STATE_UNDEFINED; uart_port_set_cons(uport, drv->cons); uport->minor = drv->tty_driver->minor_start + uport->line; uport->name = kasprintf(GFP_KERNEL, "%s%u", drv->dev_name, drv->tty_driver->name_base + uport->line); if (!uport->name) return -ENOMEM; if (uport->cons && uport->dev) of_console_check(uport->dev->of_node, uport->cons->name, uport->line); /* * TTY port has to be linked with the driver before register_console() * in uart_configure_port(), because user-space could open the console * immediately after. */ tty_port_link_device(port, drv->tty_driver, uport->line); uart_configure_port(drv, state, uport); port->console = uart_console(uport); num_groups = 2; if (uport->attr_group) num_groups++; uport->tty_groups = kcalloc(num_groups, sizeof(*uport->tty_groups), GFP_KERNEL); if (!uport->tty_groups) return -ENOMEM; uport->tty_groups[0] = &tty_dev_attr_group; if (uport->attr_group) uport->tty_groups[1] = uport->attr_group; /* Ensure serdev drivers can call serdev_device_open() right away */ uport->flags &= ~UPF_DEAD; /* * Register the port whether it's detected or not. This allows * setserial to be used to alter this port's parameters. */ tty_dev = tty_port_register_device_attr_serdev(port, drv->tty_driver, uport->line, uport->dev, &uport->port_dev->dev, port, uport->tty_groups); if (!IS_ERR(tty_dev)) { device_set_wakeup_capable(tty_dev, 1); } else { uport->flags |= UPF_DEAD; dev_err(uport->dev, "Cannot register tty device on line %u\n", uport->line); } return 0; } /** * serial_core_remove_one_port - detach a driver defined port structure * @drv: pointer to the uart low level driver structure for this port * @uport: uart port structure for this port * * Context: task context, might sleep * * This unhooks (and hangs up) the specified port structure from the core * driver. No further calls will be made to the low-level code for this port. * Caller must hold port_mutex. */ static void serial_core_remove_one_port(struct uart_driver *drv, struct uart_port *uport) { struct uart_state *state = drv->state + uport->line; struct tty_port *port = &state->port; struct uart_port *uart_port; scoped_guard(mutex, &port->mutex) { uart_port = uart_port_check(state); if (uart_port != uport) dev_alert(uport->dev, "Removing wrong port: %p != %p\n", uart_port, uport); if (!uart_port) return; } /* * Remove the devices from the tty layer */ tty_port_unregister_device(port, drv->tty_driver, uport->line); tty_port_tty_vhangup(port); /* * If the port is used as a console, unregister it */ if (uart_console(uport)) unregister_console(uport->cons); /* * Free the port IO and memory resources, if any. */ if (uport->type != PORT_UNKNOWN && uport->ops->release_port) uport->ops->release_port(uport); kfree(uport->tty_groups); kfree(uport->name); /* * Indicate that there isn't a port here anymore. */ uport->type = PORT_UNKNOWN; uport->port_dev = NULL; guard(mutex)(&port->mutex); WARN_ON(atomic_dec_return(&state->refcount) < 0); wait_event(state->remove_wait, !atomic_read(&state->refcount)); state->uart_port = NULL; } /** * uart_match_port - are the two ports equivalent? * @port1: first port * @port2: second port * * This utility function can be used to determine whether two uart_port * structures describe the same port. */ bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2) { if (port1->iotype != port2->iotype) return false; switch (port1->iotype) { case UPIO_PORT: return port1->iobase == port2->iobase; case UPIO_HUB6: return port1->iobase == port2->iobase && port1->hub6 == port2->hub6; case UPIO_MEM: case UPIO_MEM16: case UPIO_MEM32: case UPIO_MEM32BE: case UPIO_AU: case UPIO_TSI: return port1->mapbase == port2->mapbase; default: return false; } } EXPORT_SYMBOL(uart_match_port); static struct serial_ctrl_device * serial_core_get_ctrl_dev(struct serial_port_device *port_dev) { struct device *dev = &port_dev->dev; return to_serial_base_ctrl_device(dev->parent); } /* * Find a registered serial core controller device if one exists. Returns * the first device matching the ctrl_id. Caller must hold port_mutex. */ static struct serial_ctrl_device *serial_core_ctrl_find(struct uart_driver *drv, struct device *phys_dev, int ctrl_id) { struct uart_state *state; int i; lockdep_assert_held(&port_mutex); for (i = 0; i < drv->nr; i++) { state = drv->state + i; if (!state->uart_port || !state->uart_port->port_dev) continue; if (state->uart_port->dev == phys_dev && state->uart_port->ctrl_id == ctrl_id) return serial_core_get_ctrl_dev(state->uart_port->port_dev); } return NULL; } static struct serial_ctrl_device *serial_core_ctrl_device_add(struct uart_port *port) { return serial_base_ctrl_add(port, port->dev); } static int serial_core_port_device_add(struct serial_ctrl_device *ctrl_dev, struct uart_port *port) { struct serial_port_device *port_dev; port_dev = serial_base_port_add(port, ctrl_dev); if (IS_ERR(port_dev)) return PTR_ERR(port_dev); port->port_dev = port_dev; return 0; } /* * Initialize a serial core port device, and a controller device if needed. */ int serial_core_register_port(struct uart_driver *drv, struct uart_port *port) { struct serial_ctrl_device *ctrl_dev, *new_ctrl_dev = NULL; int ret; guard(mutex)(&port_mutex); /* * Prevent serial_port_runtime_resume() from trying to use the port * until serial_core_add_one_port() has completed */ port->flags |= UPF_DEAD; /* Inititalize a serial core controller device if needed */ ctrl_dev = serial_core_ctrl_find(drv, port->dev, port->ctrl_id); if (!ctrl_dev) { new_ctrl_dev = serial_core_ctrl_device_add(port); if (IS_ERR(new_ctrl_dev)) return PTR_ERR(new_ctrl_dev); ctrl_dev = new_ctrl_dev; } /* * Initialize a serial core port device. Tag the port dead to prevent * serial_port_runtime_resume() trying to do anything until port has * been registered. It gets cleared by serial_core_add_one_port(). */ ret = serial_core_port_device_add(ctrl_dev, port); if (ret) goto err_unregister_ctrl_dev; ret = serial_base_match_and_update_preferred_console(drv, port); if (ret) goto err_unregister_port_dev; ret = serial_core_add_one_port(drv, port); if (ret) goto err_unregister_port_dev; return 0; err_unregister_port_dev: serial_base_port_device_remove(port->port_dev); err_unregister_ctrl_dev: serial_base_ctrl_device_remove(new_ctrl_dev); return ret; } /* * Removes a serial core port device, and the related serial core controller * device if the last instance. */ void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port) { struct device *phys_dev = port->dev; struct serial_port_device *port_dev = port->port_dev; struct serial_ctrl_device *ctrl_dev = serial_core_get_ctrl_dev(port_dev); int ctrl_id = port->ctrl_id; guard(mutex)(&port_mutex); port->flags |= UPF_DEAD; serial_core_remove_one_port(drv, port); /* Note that struct uart_port *port is no longer valid at this point */ serial_base_port_device_remove(port_dev); /* Drop the serial core controller device if no ports are using it */ if (!serial_core_ctrl_find(drv, phys_dev, ctrl_id)) serial_base_ctrl_device_remove(ctrl_dev); } /** * uart_handle_dcd_change - handle a change of carrier detect state * @uport: uart_port structure for the open port * @active: new carrier detect status * * Caller must hold uport->lock. */ void uart_handle_dcd_change(struct uart_port *uport, bool active) { struct tty_port *port = &uport->state->port; struct tty_struct *tty = port->tty; struct tty_ldisc *ld; lockdep_assert_held_once(&uport->lock); if (tty) { ld = tty_ldisc_ref(tty); if (ld) { if (ld->ops->dcd_change) ld->ops->dcd_change(tty, active); tty_ldisc_deref(ld); } } uport->icount.dcd++; if (uart_dcd_enabled(uport)) { if (active) wake_up_interruptible(&port->open_wait); else if (tty) tty_hangup(tty); } } EXPORT_SYMBOL_GPL(uart_handle_dcd_change); /** * uart_handle_cts_change - handle a change of clear-to-send state * @uport: uart_port structure for the open port * @active: new clear-to-send status * * Caller must hold uport->lock. */ void uart_handle_cts_change(struct uart_port *uport, bool active) { lockdep_assert_held_once(&uport->lock); uport->icount.cts++; if (uart_softcts_mode(uport)) { if (uport->hw_stopped) { if (active) { uport->hw_stopped = false; uport->ops->start_tx(uport); uart_write_wakeup(uport); } } else { if (!active) { uport->hw_stopped = true; uport->ops->stop_tx(uport); } } } } EXPORT_SYMBOL_GPL(uart_handle_cts_change); /** * uart_insert_char - push a char to the uart layer * * User is responsible to call tty_flip_buffer_push when they are done with * insertion. * * @port: corresponding port * @status: state of the serial port RX buffer (LSR for 8250) * @overrun: mask of overrun bits in @status * @ch: character to push * @flag: flag for the character (see TTY_NORMAL and friends) */ void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, u8 ch, u8 flag) { struct tty_port *tport = &port->state->port; if ((status & port->ignore_status_mask & ~overrun) == 0) if (tty_insert_flip_char(tport, ch, flag) == 0) ++port->icount.buf_overrun; /* * Overrun is special. Since it's reported immediately, * it doesn't affect the current character. */ if (status & ~port->ignore_status_mask & overrun) if (tty_insert_flip_char(tport, 0, TTY_OVERRUN) == 0) ++port->icount.buf_overrun; } EXPORT_SYMBOL_GPL(uart_insert_char); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL static const u8 sysrq_toggle_seq[] = CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE; static void uart_sysrq_on(struct work_struct *w) { int sysrq_toggle_seq_len = strlen(sysrq_toggle_seq); sysrq_toggle_support(1); pr_info("SysRq is enabled by magic sequence '%*pE' on serial\n", sysrq_toggle_seq_len, sysrq_toggle_seq); } static DECLARE_WORK(sysrq_enable_work, uart_sysrq_on); /** * uart_try_toggle_sysrq - Enables SysRq from serial line * @port: uart_port structure where char(s) after BREAK met * @ch: new character in the sequence after received BREAK * * Enables magic SysRq when the required sequence is met on port * (see CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE). * * Returns: %false if @ch is out of enabling sequence and should be * handled some other way, %true if @ch was consumed. */ bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch) { int sysrq_toggle_seq_len = strlen(sysrq_toggle_seq); if (!sysrq_toggle_seq_len) return false; BUILD_BUG_ON(ARRAY_SIZE(sysrq_toggle_seq) >= U8_MAX); if (sysrq_toggle_seq[port->sysrq_seq] != ch) { port->sysrq_seq = 0; return false; } if (++port->sysrq_seq < sysrq_toggle_seq_len) { port->sysrq = jiffies + SYSRQ_TIMEOUT; return true; } schedule_work(&sysrq_enable_work); port->sysrq = 0; return true; } EXPORT_SYMBOL_GPL(uart_try_toggle_sysrq); #endif /** * uart_get_rs485_mode() - retrieve rs485 properties for given uart * @port: uart device's target port * * This function implements the device tree binding described in * Documentation/devicetree/bindings/serial/rs485.txt. */ int uart_get_rs485_mode(struct uart_port *port) { struct serial_rs485 *rs485conf = &port->rs485; struct device *dev = port->dev; enum gpiod_flags dflags; struct gpio_desc *desc; u32 rs485_delay[2]; int ret; if (!(port->rs485_supported.flags & SER_RS485_ENABLED)) return 0; /* * Retrieve properties only if a firmware node exists. If no firmware * node exists, then don't touch rs485 config and keep initial rs485 * properties set by driver. */ if (!dev_fwnode(dev)) return 0; ret = device_property_read_u32_array(dev, "rs485-rts-delay", rs485_delay, 2); if (!ret) { rs485conf->delay_rts_before_send = rs485_delay[0]; rs485conf->delay_rts_after_send = rs485_delay[1]; } else { rs485conf->delay_rts_before_send = 0; rs485conf->delay_rts_after_send = 0; } uart_sanitize_serial_rs485_delays(port, rs485conf); /* * Clear full-duplex and enabled flags, set RTS polarity to active high * to get to a defined state with the following properties: */ rs485conf->flags &= ~(SER_RS485_RX_DURING_TX | SER_RS485_ENABLED | SER_RS485_TERMINATE_BUS | SER_RS485_RTS_AFTER_SEND); rs485conf->flags |= SER_RS485_RTS_ON_SEND; if (device_property_read_bool(dev, "rs485-rx-during-tx")) rs485conf->flags |= SER_RS485_RX_DURING_TX; if (device_property_read_bool(dev, "linux,rs485-enabled-at-boot-time")) rs485conf->flags |= SER_RS485_ENABLED; if (device_property_read_bool(dev, "rs485-rts-active-low")) { rs485conf->flags &= ~SER_RS485_RTS_ON_SEND; rs485conf->flags |= SER_RS485_RTS_AFTER_SEND; } /* * Disabling termination by default is the safe choice: Else if many * bus participants enable it, no communication is possible at all. * Works fine for short cables and users may enable for longer cables. */ desc = devm_gpiod_get_optional(dev, "rs485-term", GPIOD_OUT_LOW); if (IS_ERR(desc)) return dev_err_probe(dev, PTR_ERR(desc), "Cannot get rs485-term-gpios\n"); port->rs485_term_gpio = desc; if (port->rs485_term_gpio) port->rs485_supported.flags |= SER_RS485_TERMINATE_BUS; dflags = (rs485conf->flags & SER_RS485_RX_DURING_TX) ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; desc = devm_gpiod_get_optional(dev, "rs485-rx-during-tx", dflags); if (IS_ERR(desc)) return dev_err_probe(dev, PTR_ERR(desc), "Cannot get rs485-rx-during-tx-gpios\n"); port->rs485_rx_during_tx_gpio = desc; if (port->rs485_rx_during_tx_gpio) port->rs485_supported.flags |= SER_RS485_RX_DURING_TX; return 0; } EXPORT_SYMBOL_GPL(uart_get_rs485_mode); /* Compile-time assertions for serial_rs485 layout */ static_assert(offsetof(struct serial_rs485, padding) == (offsetof(struct serial_rs485, delay_rts_after_send) + sizeof(__u32))); static_assert(offsetof(struct serial_rs485, padding1) == offsetof(struct serial_rs485, padding[1])); static_assert((offsetof(struct serial_rs485, padding[4]) + sizeof(__u32)) == sizeof(struct serial_rs485)); MODULE_DESCRIPTION("Serial driver core"); MODULE_LICENSE("GPL"); |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ #define _NET_BATMAN_ADV_ORIGINATOR_H_ #include "main.h" #include <linux/compiler.h> #include <linux/if_ether.h> #include <linux/jhash.h> #include <linux/kref.h> #include <linux/netlink.h> #include <linux/skbuff.h> #include <linux/types.h> bool batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_release(struct kref *ref); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr); struct batadv_hardif_neigh_node * batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, const u8 *neigh_addr); void batadv_hardif_neigh_release(struct kref *ref); struct batadv_neigh_node * batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, const u8 *neigh_addr); void batadv_neigh_node_release(struct kref *ref); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, const struct batadv_hard_iface *if_outgoing); struct batadv_neigh_node * batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr, struct batadv_hard_iface *if_outgoing); struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); void batadv_neigh_ifinfo_release(struct kref *ref); int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb); struct batadv_orig_ifinfo * batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); struct batadv_orig_ifinfo * batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); void batadv_orig_ifinfo_release(struct kref *ref); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); struct batadv_orig_node_vlan * batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid); void batadv_orig_node_vlan_release(struct kref *ref); /** * batadv_choose_orig() - Return the index of the orig entry in the hash table * @data: mac address of the originator node * @size: the size of the hash table * * Return: the hash index where the object represented by @data should be * stored at. */ static inline u32 batadv_choose_orig(const void *data, u32 size) { u32 hash = 0; hash = jhash(data, ETH_ALEN, hash); return hash % size; } struct batadv_orig_node * batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data); /** * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release * the originator-vlan object * @orig_vlan: the originator-vlan object to release */ static inline void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan) { if (!orig_vlan) return; kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release); } /** * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release * the neigh_ifinfo * @neigh_ifinfo: the neigh_ifinfo object to release */ static inline void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo) { if (!neigh_ifinfo) return; kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release); } /** * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter * and possibly release it * @hardif_neigh: hardif neigh neighbor to free */ static inline void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh) { if (!hardif_neigh) return; kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release); } /** * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly * release it * @neigh_node: neigh neighbor to free */ static inline void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node) { if (!neigh_node) return; kref_put(&neigh_node->refcount, batadv_neigh_node_release); } /** * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release * the orig_ifinfo * @orig_ifinfo: the orig_ifinfo object to release */ static inline void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo) { if (!orig_ifinfo) return; kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release); } /** * batadv_orig_node_put() - decrement the orig node refcounter and possibly * release it * @orig_node: the orig node to free */ static inline void batadv_orig_node_put(struct batadv_orig_node *orig_node) { if (!orig_node) return; kref_put(&orig_node->refcount, batadv_orig_node_release); } #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ |
| 48 48 46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-mul_2.c - MPI helper functions * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" #include "longlong.h" mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t cy_limb; mpi_size_t j; mpi_limb_t prod_high, prod_low; mpi_limb_t x; /* The loop counter and index J goes from -SIZE to -1. This way * the loop becomes faster. */ j = -s1_size; res_ptr -= j; s1_ptr -= j; cy_limb = 0; do { umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); prod_low += cy_limb; cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; x = res_ptr[j]; prod_low = x + prod_low; cy_limb += prod_low < x ? 1 : 0; res_ptr[j] = prod_low; } while (++j); return cy_limb; } |
| 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_IOBITMAP_H #define _ASM_X86_IOBITMAP_H #include <linux/refcount.h> #include <asm/processor.h> struct io_bitmap { u64 sequence; refcount_t refcnt; /* The maximum number of bytes to copy so all zero bits are covered */ unsigned int max; unsigned long bitmap[IO_BITMAP_LONGS]; }; struct task_struct; #ifdef CONFIG_X86_IOPL_IOPERM void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(struct task_struct *tsk); static inline void native_tss_invalidate_io_bitmap(void) { /* * Invalidate the I/O bitmap by moving io_bitmap_base outside the * TSS limit so any subsequent I/O access from user space will * trigger a #GP. * * This is correct even when VMEXIT rewrites the TSS limit * to 0x67 as the only requirement is that the base points * outside the limit. */ this_cpu_write(cpu_tss_rw.x86_tss.io_bitmap_base, IO_BITMAP_OFFSET_INVALID); } void native_tss_update_io_bitmap(void); #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define tss_update_io_bitmap native_tss_update_io_bitmap #define tss_invalidate_io_bitmap native_tss_invalidate_io_bitmap #endif #else static inline void io_bitmap_share(struct task_struct *tsk) { } static inline void io_bitmap_exit(struct task_struct *tsk) { } static inline void tss_update_io_bitmap(void) { } #endif #endif |
| 1 1 1 1 1 2 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | // SPDX-License-Identifier: GPL-2.0+ /* * c6xdigio.c * Hardware driver for Mechatronic Systems Inc. C6x_DIGIO DSP daughter card. * http://web.archive.org/web/%2A/http://robot0.ge.uiuc.edu/~spong/mecha/ * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1999 Dan Block */ /* * Driver: c6xdigio * Description: Mechatronic Systems Inc. C6x_DIGIO DSP daughter card * Author: Dan Block * Status: unknown * Devices: [Mechatronic Systems Inc.] C6x_DIGIO DSP daughter card (c6xdigio) * Updated: Sun Nov 20 20:18:34 EST 2005 * * Configuration Options: * [0] - base address */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/timex.h> #include <linux/timer.h> #include <linux/io.h> #include <linux/pnp.h> #include <linux/comedi/comedidev.h> /* * Register I/O map */ #define C6XDIGIO_DATA_REG 0x00 #define C6XDIGIO_DATA_CHAN(x) (((x) + 1) << 4) #define C6XDIGIO_DATA_PWM BIT(5) #define C6XDIGIO_DATA_ENCODER BIT(6) #define C6XDIGIO_STATUS_REG 0x01 #define C6XDIGIO_CTRL_REG 0x02 #define C6XDIGIO_TIME_OUT 20 static int c6xdigio_chk_status(struct comedi_device *dev, unsigned long context) { unsigned int status; int timeout = 0; do { status = inb(dev->iobase + C6XDIGIO_STATUS_REG); if ((status & 0x80) != context) return 0; timeout++; } while (timeout < C6XDIGIO_TIME_OUT); return -EBUSY; } static int c6xdigio_write_data(struct comedi_device *dev, unsigned int val, unsigned int status) { outb_p(val, dev->iobase + C6XDIGIO_DATA_REG); return c6xdigio_chk_status(dev, status); } static int c6xdigio_get_encoder_bits(struct comedi_device *dev, unsigned int *bits, unsigned int cmd, unsigned int status) { unsigned int val; val = inb(dev->iobase + C6XDIGIO_STATUS_REG); val >>= 3; val &= 0x07; *bits = val; return c6xdigio_write_data(dev, cmd, status); } static void c6xdigio_pwm_write(struct comedi_device *dev, unsigned int chan, unsigned int val) { unsigned int cmd = C6XDIGIO_DATA_PWM | C6XDIGIO_DATA_CHAN(chan); unsigned int bits; if (val > 498) val = 498; if (val < 2) val = 2; bits = (val >> 0) & 0x03; c6xdigio_write_data(dev, cmd | bits | (0 << 2), 0x00); bits = (val >> 2) & 0x03; c6xdigio_write_data(dev, cmd | bits | (1 << 2), 0x80); bits = (val >> 4) & 0x03; c6xdigio_write_data(dev, cmd | bits | (0 << 2), 0x00); bits = (val >> 6) & 0x03; c6xdigio_write_data(dev, cmd | bits | (1 << 2), 0x80); bits = (val >> 8) & 0x03; c6xdigio_write_data(dev, cmd | bits | (0 << 2), 0x00); c6xdigio_write_data(dev, 0x00, 0x80); } static int c6xdigio_encoder_read(struct comedi_device *dev, unsigned int chan) { unsigned int cmd = C6XDIGIO_DATA_ENCODER | C6XDIGIO_DATA_CHAN(chan); unsigned int val = 0; unsigned int bits; c6xdigio_write_data(dev, cmd, 0x00); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 0); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 3); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 6); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 9); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 12); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 15); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 18); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 21); c6xdigio_write_data(dev, 0x00, 0x80); return val; } static int c6xdigio_pwm_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val = (s->state >> (16 * chan)) & 0xffff; int i; for (i = 0; i < insn->n; i++) { val = data[i]; c6xdigio_pwm_write(dev, chan, val); } /* * There are only 2 PWM channels and they have a maxdata of 500. * Instead of allocating private data to save the values in for * readback this driver just packs the values for the two channels * in the s->state. */ s->state &= (0xffff << (16 * chan)); s->state |= (val << (16 * chan)); return insn->n; } static int c6xdigio_pwm_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val; int i; val = (s->state >> (16 * chan)) & 0xffff; for (i = 0; i < insn->n; i++) data[i] = val; return insn->n; } static int c6xdigio_encoder_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val; int i; for (i = 0; i < insn->n; i++) { val = c6xdigio_encoder_read(dev, chan); /* munge two's complement value to offset binary */ data[i] = comedi_offset_munge(s, val); } return insn->n; } static void c6xdigio_init(struct comedi_device *dev) { /* Initialize the PWM */ c6xdigio_write_data(dev, 0x70, 0x00); c6xdigio_write_data(dev, 0x74, 0x80); c6xdigio_write_data(dev, 0x70, 0x00); c6xdigio_write_data(dev, 0x00, 0x80); /* Reset the encoders */ c6xdigio_write_data(dev, 0x68, 0x00); c6xdigio_write_data(dev, 0x6c, 0x80); c6xdigio_write_data(dev, 0x68, 0x00); c6xdigio_write_data(dev, 0x00, 0x80); } static const struct pnp_device_id c6xdigio_pnp_tbl[] = { /* Standard LPT Printer Port */ {.id = "PNP0400", .driver_data = 0}, /* ECP Printer Port */ {.id = "PNP0401", .driver_data = 0}, {} }; static struct pnp_driver c6xdigio_pnp_driver = { .name = "c6xdigio", .id_table = c6xdigio_pnp_tbl, }; static int c6xdigio_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct comedi_subdevice *s; int ret; ret = comedi_request_region(dev, it->options[0], 0x03); if (ret) return ret; ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; s = &dev->subdevices[0]; /* pwm output subdevice */ s->type = COMEDI_SUBD_PWM; s->subdev_flags = SDF_WRITABLE; s->n_chan = 2; s->maxdata = 500; s->range_table = &range_unknown; s->insn_write = c6xdigio_pwm_insn_write; s->insn_read = c6xdigio_pwm_insn_read; s = &dev->subdevices[1]; /* encoder (counter) subdevice */ s->type = COMEDI_SUBD_COUNTER; s->subdev_flags = SDF_READABLE | SDF_LSAMPL; s->n_chan = 2; s->maxdata = 0xffffff; s->range_table = &range_unknown; s->insn_read = c6xdigio_encoder_insn_read; /* I will call this init anyway but more than likely the DSP board */ /* will not be connected when device driver is loaded. */ c6xdigio_init(dev); return 0; } static struct comedi_driver c6xdigio_driver = { .driver_name = "c6xdigio", .module = THIS_MODULE, .attach = c6xdigio_attach, .detach = comedi_legacy_detach, }; static bool c6xdigio_pnp_registered = false; static int __init c6xdigio_module_init(void) { int ret; ret = comedi_driver_register(&c6xdigio_driver); if (ret) return ret; if (IS_ENABLED(CONFIG_PNP)) { /* Try to activate the PnP ports */ ret = pnp_register_driver(&c6xdigio_pnp_driver); if (ret) { pr_warn("failed to register pnp driver - err %d\n", ret); ret = 0; /* ignore the error. */ } else { c6xdigio_pnp_registered = true; } } return 0; } module_init(c6xdigio_module_init); static void __exit c6xdigio_module_exit(void) { if (c6xdigio_pnp_registered) pnp_unregister_driver(&c6xdigio_pnp_driver); comedi_driver_unregister(&c6xdigio_driver); } module_exit(c6xdigio_module_exit); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi driver for the C6x_DIGIO DSP daughter card"); MODULE_LICENSE("GPL"); |
| 2 2 7 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 47 32 14 11 43 47 1 5 4 3 2 2 1 2 1 2 2 1 9 8 7 6 5 4 3 2 1 4 3 2 1 4 2 2 1 1 1 2 2 2 2 1 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 14 12 6 5 2 3 2 6 5 3 2 2 3 14 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 10 9 9 7 4 8 5 6 1 4 3 2 5 4 5 5 4 6 6 6 6 6 6 28 27 27 31 1 1 1 2 2 7 7 7 1 1 1 6 5 4 3 2 2 2 7 6 7 8 8 3 2 4 1 1 1 2 2 2 1 2 3 3 3 2 1 3 3 3 1 1 3 1 3 4 3 2 2 2 4 2 2 3 3 2 2 2 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 | // SPDX-License-Identifier: GPL-2.0-only /* * Netlink interface for IEEE 802.15.4 stack * * Copyright 2007, 2008 Siemens AG * * Written by: * Sergey Lapin <slapin@ossfans.org> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Maxim Osipov <maxim.osipov@siemens.com> */ #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/ieee802154.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/sock.h> #include <linux/nl802154.h> #include <linux/export.h> #include <net/af_ieee802154.h> #include <net/ieee802154_netdev.h> #include <net/cfg802154.h> #include "ieee802154.h" static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr, int padattr) { return nla_put_u64_64bit(msg, type, swab64((__force u64)hwaddr), padattr); } static __le64 nla_get_hwaddr(const struct nlattr *nla) { return ieee802154_devaddr_from_raw(nla_data(nla)); } static int nla_put_shortaddr(struct sk_buff *msg, int type, __le16 addr) { return nla_put_u16(msg, type, le16_to_cpu(addr)); } static __le16 nla_get_shortaddr(const struct nlattr *nla) { return cpu_to_le16(nla_get_u16(nla)); } static int ieee802154_nl_start_confirm(struct net_device *dev, u8 status) { struct sk_buff *msg; pr_debug("%s\n", __func__); msg = ieee802154_nl_create(0, IEEE802154_START_CONF); if (!msg) return -ENOBUFS; if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, dev->dev_addr) || nla_put_u8(msg, IEEE802154_ATTR_STATUS, status)) goto nla_put_failure; return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) { void *hdr; struct wpan_phy *phy; struct ieee802154_mlme_ops *ops; __le16 short_addr, pan_id; pr_debug("%s\n", __func__); hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, IEEE802154_LIST_IFACE); if (!hdr) goto out; ops = ieee802154_mlme_ops(dev); phy = dev->ieee802154_ptr->wpan_phy; BUG_ON(!phy); get_device(&phy->dev); rtnl_lock(); short_addr = dev->ieee802154_ptr->short_addr; pan_id = dev->ieee802154_ptr->pan_id; rtnl_unlock(); if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, dev->dev_addr) || nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) || nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, pan_id)) goto nla_put_failure; if (ops->get_mac_params) { struct ieee802154_mac_params params; rtnl_lock(); ops->get_mac_params(dev, ¶ms); rtnl_unlock(); if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, params.transmit_power / 100) || nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, params.cca.mode) || nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, params.cca_ed_level / 100) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, params.csma_retries) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, params.min_be) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_MAX_BE, params.max_be) || nla_put_s8(msg, IEEE802154_ATTR_FRAME_RETRIES, params.frame_retries)) goto nla_put_failure; } wpan_phy_put(phy); genlmsg_end(msg, hdr); return 0; nla_put_failure: wpan_phy_put(phy); genlmsg_cancel(msg, hdr); out: return -EMSGSIZE; } /* Requests from userspace */ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) { struct net_device *dev; if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { char name[IFNAMSIZ + 1]; nla_strscpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], sizeof(name)); dev = dev_get_by_name(&init_net, name); } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) { dev = dev_get_by_index(&init_net, nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); } else { return NULL; } if (!dev) return NULL; if (dev->type != ARPHRD_IEEE802154) { dev_put(dev); return NULL; } return dev; } int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct ieee802154_addr addr; u8 page; int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_CHANNEL] || !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || (!info->attrs[IEEE802154_ATTR_COORD_HW_ADDR] && !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]) || !info->attrs[IEEE802154_ATTR_CAPABILITY]) return -EINVAL; dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; if (!ieee802154_mlme_ops(dev)->assoc_req) goto out; if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { addr.mode = IEEE802154_ADDR_LONG; addr.extended_addr = nla_get_hwaddr( info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]); } else { addr.mode = IEEE802154_ADDR_SHORT; addr.short_addr = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); } addr.pan_id = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0); ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]), page, nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); out: dev_put(dev); return ret; } int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct ieee802154_addr addr; int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_STATUS] || !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) return -EINVAL; dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; if (!ieee802154_mlme_ops(dev)->assoc_resp) goto out; addr.mode = IEEE802154_ADDR_LONG; addr.extended_addr = nla_get_hwaddr( info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]); rtnl_lock(); addr.pan_id = dev->ieee802154_ptr->pan_id; rtnl_unlock(); ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr, nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); out: dev_put(dev); return ret; } int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct ieee802154_addr addr; int ret = -EOPNOTSUPP; if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || !info->attrs[IEEE802154_ATTR_REASON]) return -EINVAL; dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; if (!ieee802154_mlme_ops(dev)->disassoc_req) goto out; if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { addr.mode = IEEE802154_ADDR_LONG; addr.extended_addr = nla_get_hwaddr( info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]); } else { addr.mode = IEEE802154_ADDR_SHORT; addr.short_addr = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]); } rtnl_lock(); addr.pan_id = dev->ieee802154_ptr->pan_id; rtnl_unlock(); ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); out: dev_put(dev); return ret; } /* PANid, channel, beacon_order = 15, superframe_order = 15, * PAN_coordinator, battery_life_extension = 0, * coord_realignment = 0, security_enable = 0 */ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct ieee802154_addr addr; u8 channel, bcn_ord, sf_ord; u8 page; int pan_coord, blx, coord_realign; int ret = -EBUSY; if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || !info->attrs[IEEE802154_ATTR_CHANNEL] || !info->attrs[IEEE802154_ATTR_BCN_ORD] || !info->attrs[IEEE802154_ATTR_SF_ORD] || !info->attrs[IEEE802154_ATTR_PAN_COORD] || !info->attrs[IEEE802154_ATTR_BAT_EXT] || !info->attrs[IEEE802154_ATTR_COORD_REALIGN] ) return -EINVAL; dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; if (netif_running(dev)) goto out; if (!ieee802154_mlme_ops(dev)->start_req) { ret = -EOPNOTSUPP; goto out; } addr.mode = IEEE802154_ADDR_SHORT; addr.short_addr = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); addr.pan_id = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]); bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]); sf_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_SF_ORD]); pan_coord = nla_get_u8(info->attrs[IEEE802154_ATTR_PAN_COORD]); blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]); coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]); page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0); if (addr.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS); dev_put(dev); return -EINVAL; } rtnl_lock(); ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, bcn_ord, sf_ord, pan_coord, blx, coord_realign); rtnl_unlock(); /* FIXME: add validation for unused parameters to be sane * for SoftMAC */ ieee802154_nl_start_confirm(dev, IEEE802154_SUCCESS); out: dev_put(dev); return ret; } int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; int ret = -EOPNOTSUPP; u8 type; u32 channels; u8 duration; u8 page; if (!info->attrs[IEEE802154_ATTR_SCAN_TYPE] || !info->attrs[IEEE802154_ATTR_CHANNELS] || !info->attrs[IEEE802154_ATTR_DURATION]) return -EINVAL; dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; if (!ieee802154_mlme_ops(dev)->scan_req) goto out; type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]); page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0); ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, duration); out: dev_put(dev); return ret; } int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info) { /* Request for interface name, index, type, IEEE address, * PAN Id, short address */ struct sk_buff *msg; struct net_device *dev = NULL; int rc = -ENOBUFS; pr_debug("%s\n", __func__); dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out_dev; rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, 0, dev); if (rc < 0) goto out_free; dev_put(dev); return genlmsg_reply(msg, info); out_free: nlmsg_free(msg); out_dev: dev_put(dev); return rc; } int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct net_device *dev; int idx; int s_idx = cb->args[0]; pr_debug("%s\n", __func__); idx = 0; for_each_netdev(net, dev) { if (idx < s_idx || dev->type != ARPHRD_IEEE802154) goto cont; if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) break; cont: idx++; } cb->args[0] = idx; return skb->len; } int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev = NULL; struct ieee802154_mlme_ops *ops; struct ieee802154_mac_params params; struct wpan_phy *phy; int rc = -EINVAL; pr_debug("%s\n", __func__); dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; ops = ieee802154_mlme_ops(dev); if (!ops->get_mac_params || !ops->set_mac_params) { rc = -EOPNOTSUPP; goto out; } if (netif_running(dev)) { rc = -EBUSY; goto out; } if (!info->attrs[IEEE802154_ATTR_LBT_ENABLED] && !info->attrs[IEEE802154_ATTR_CCA_MODE] && !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL] && !info->attrs[IEEE802154_ATTR_CSMA_RETRIES] && !info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] && !info->attrs[IEEE802154_ATTR_CSMA_MAX_BE] && !info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) goto out; phy = dev->ieee802154_ptr->wpan_phy; get_device(&phy->dev); rtnl_lock(); ops->get_mac_params(dev, ¶ms); if (info->attrs[IEEE802154_ATTR_TXPOWER]) params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]) * 100; if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); if (info->attrs[IEEE802154_ATTR_CCA_MODE]) params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) * 100; if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES]) params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]); if (info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]) params.min_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]); if (info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) params.max_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]); if (info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) params.frame_retries = nla_get_s8(info->attrs[IEEE802154_ATTR_FRAME_RETRIES]); rc = ops->set_mac_params(dev, ¶ms); rtnl_unlock(); wpan_phy_put(phy); dev_put(dev); return 0; out: dev_put(dev); return rc; } static int ieee802154_llsec_parse_key_id(struct genl_info *info, struct ieee802154_llsec_key_id *desc) { memset(desc, 0, sizeof(*desc)); if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) return -EINVAL; desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]); if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { if (!info->attrs[IEEE802154_ATTR_PAN_ID]) return -EINVAL; desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) { desc->device_addr.mode = IEEE802154_ADDR_SHORT; desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); } else { if (!info->attrs[IEEE802154_ATTR_HW_ADDR]) return -EINVAL; desc->device_addr.mode = IEEE802154_ADDR_LONG; desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); } } if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]) return -EINVAL; if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]) return -EINVAL; if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]) return -EINVAL; if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT) desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]); switch (desc->mode) { case IEEE802154_SCF_KEY_SHORT_INDEX: { u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]); desc->short_source = cpu_to_le32(source); break; } case IEEE802154_SCF_KEY_HW_INDEX: desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]); break; } return 0; } static int ieee802154_llsec_fill_key_id(struct sk_buff *msg, const struct ieee802154_llsec_key_id *desc) { if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode)) return -EMSGSIZE; if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->device_addr.pan_id)) return -EMSGSIZE; if (desc->device_addr.mode == IEEE802154_ADDR_SHORT && nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, desc->device_addr.short_addr)) return -EMSGSIZE; if (desc->device_addr.mode == IEEE802154_ADDR_LONG && nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->device_addr.extended_addr, IEEE802154_ATTR_PAD)) return -EMSGSIZE; } if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id)) return -EMSGSIZE; if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT, le32_to_cpu(desc->short_source))) return -EMSGSIZE; if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, desc->extended_source, IEEE802154_ATTR_PAD)) return -EMSGSIZE; return 0; } int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct net_device *dev = NULL; int rc = -ENOBUFS; struct ieee802154_mlme_ops *ops; void *hdr; struct ieee802154_llsec_params params; pr_debug("%s\n", __func__); dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; ops = ieee802154_mlme_ops(dev); if (!ops->llsec) { rc = -EOPNOTSUPP; goto out_dev; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out_dev; hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0, IEEE802154_LLSEC_GETPARAMS); if (!hdr) goto out_free; rc = ops->llsec->get_params(dev, ¶ms); if (rc < 0) goto out_free; if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, be32_to_cpu(params.frame_counter)) || ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) { rc = -ENOBUFS; goto out_free; } dev_put(dev); return ieee802154_nl_reply(msg, info); out_free: nlmsg_free(msg); out_dev: dev_put(dev); return rc; } int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev = NULL; int rc = -EINVAL; struct ieee802154_mlme_ops *ops; struct ieee802154_llsec_params params; int changed = 0; pr_debug("%s\n", __func__); dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] && !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] && !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) goto out; ops = ieee802154_mlme_ops(dev); if (!ops->llsec) { rc = -EOPNOTSUPP; goto out; } if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] && nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7) goto out; if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) { params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]); changed |= IEEE802154_LLSEC_PARAM_ENABLED; } if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) { if (ieee802154_llsec_parse_key_id(info, ¶ms.out_key)) goto out; changed |= IEEE802154_LLSEC_PARAM_OUT_KEY; } if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) { params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]); changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL; } if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) { u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); params.frame_counter = cpu_to_be32(fc); changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER; } rc = ops->llsec->set_params(dev, ¶ms, changed); dev_put(dev); return rc; out: dev_put(dev); return rc; } struct llsec_dump_data { struct sk_buff *skb; int s_idx, s_idx2; int portid; int nlmsg_seq; struct net_device *dev; struct ieee802154_mlme_ops *ops; struct ieee802154_llsec_table *table; }; static int ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb, int (*step)(struct llsec_dump_data *)) { struct net *net = sock_net(skb->sk); struct net_device *dev; struct llsec_dump_data data; int idx = 0; int first_dev = cb->args[0]; int rc; for_each_netdev(net, dev) { if (idx < first_dev || dev->type != ARPHRD_IEEE802154) goto skip; data.ops = ieee802154_mlme_ops(dev); if (!data.ops->llsec) goto skip; data.skb = skb; data.s_idx = cb->args[1]; data.s_idx2 = cb->args[2]; data.dev = dev; data.portid = NETLINK_CB(cb->skb).portid; data.nlmsg_seq = cb->nlh->nlmsg_seq; data.ops->llsec->lock_table(dev); data.ops->llsec->get_table(data.dev, &data.table); rc = step(&data); data.ops->llsec->unlock_table(dev); if (rc < 0) break; skip: idx++; } cb->args[0] = idx; return skb->len; } static int ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info, int (*fn)(struct net_device*, struct genl_info*)) { struct net_device *dev = NULL; int rc = -EINVAL; dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; if (!ieee802154_mlme_ops(dev)->llsec) rc = -EOPNOTSUPP; else rc = fn(dev, info); dev_put(dev); return rc; } static int ieee802154_llsec_parse_key(struct genl_info *info, struct ieee802154_llsec_key *key) { u8 frames; u32 commands[256 / 32]; memset(key, 0, sizeof(*key)); if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] || !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES]) return -EINVAL; frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]); if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) && !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) return -EINVAL; if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) { nla_memcpy(commands, info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS], 256 / 8); if (commands[0] || commands[1] || commands[2] || commands[3] || commands[4] || commands[5] || commands[6] || commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1)) return -EINVAL; key->cmd_frame_ids = commands[7]; } key->frame_types = frames; nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES], IEEE802154_LLSEC_KEY_SIZE); return 0; } static int llsec_add_key(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct ieee802154_llsec_key key; struct ieee802154_llsec_key_id id; if (ieee802154_llsec_parse_key(info, &key) || ieee802154_llsec_parse_key_id(info, &id)) return -EINVAL; return ops->llsec->add_key(dev, &id, &key); } int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info) { if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != (NLM_F_CREATE | NLM_F_EXCL)) return -EINVAL; return ieee802154_nl_llsec_change(skb, info, llsec_add_key); } static int llsec_remove_key(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct ieee802154_llsec_key_id id; if (ieee802154_llsec_parse_key_id(info, &id)) return -EINVAL; return ops->llsec->del_key(dev, &id); } int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info) { return ieee802154_nl_llsec_change(skb, info, llsec_remove_key); } static int ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq, const struct ieee802154_llsec_key_entry *key, const struct net_device *dev) { void *hdr; u32 commands[256 / 32]; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, IEEE802154_LLSEC_LIST_KEY); if (!hdr) goto out; if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || ieee802154_llsec_fill_key_id(msg, &key->id) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES, key->key->frame_types)) goto nla_put_failure; if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) { memset(commands, 0, sizeof(commands)); commands[7] = key->key->cmd_frame_ids; if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS, sizeof(commands), commands)) goto nla_put_failure; } if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES, IEEE802154_LLSEC_KEY_SIZE, key->key->key)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); out: return -EMSGSIZE; } static int llsec_iter_keys(struct llsec_dump_data *data) { struct ieee802154_llsec_key_entry *pos; int rc = 0, idx = 0; list_for_each_entry(pos, &data->table->keys, list) { if (idx++ < data->s_idx) continue; if (ieee802154_nl_fill_key(data->skb, data->portid, data->nlmsg_seq, pos, data->dev)) { rc = -EMSGSIZE; break; } data->s_idx++; } return rc; } int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb) { return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys); } static int llsec_parse_dev(struct genl_info *info, struct ieee802154_llsec_device *dev) { memset(dev, 0, sizeof(*dev)); if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || !info->attrs[IEEE802154_ATTR_HW_ADDR] || !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] || !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] || (!!info->attrs[IEEE802154_ATTR_PAN_ID] != !!info->attrs[IEEE802154_ATTR_SHORT_ADDR])) return -EINVAL; if (info->attrs[IEEE802154_ATTR_PAN_ID]) { dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); } else { dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF); } dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]); if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX) return -EINVAL; return 0; } static int llsec_add_dev(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct ieee802154_llsec_device desc; if (llsec_parse_dev(info, &desc)) return -EINVAL; return ops->llsec->add_dev(dev, &desc); } int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info) { if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != (NLM_F_CREATE | NLM_F_EXCL)) return -EINVAL; return ieee802154_nl_llsec_change(skb, info, llsec_add_dev); } static int llsec_del_dev(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); __le64 devaddr; if (!info->attrs[IEEE802154_ATTR_HW_ADDR]) return -EINVAL; devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); return ops->llsec->del_dev(dev, devaddr); } int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info) { return ieee802154_nl_llsec_change(skb, info, llsec_del_dev); } static int ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, const struct ieee802154_llsec_device *desc, const struct net_device *dev) { void *hdr; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, IEEE802154_LLSEC_LIST_DEV); if (!hdr) goto out; if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, desc->short_addr) || nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr, IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, desc->frame_counter) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, desc->seclevel_exempt) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); out: return -EMSGSIZE; } static int llsec_iter_devs(struct llsec_dump_data *data) { struct ieee802154_llsec_device *pos; int rc = 0, idx = 0; list_for_each_entry(pos, &data->table->devices, list) { if (idx++ < data->s_idx) continue; if (ieee802154_nl_fill_dev(data->skb, data->portid, data->nlmsg_seq, pos, data->dev)) { rc = -EMSGSIZE; break; } data->s_idx++; } return rc; } int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb) { return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs); } static int llsec_add_devkey(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct ieee802154_llsec_device_key key; __le64 devaddr; if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || !info->attrs[IEEE802154_ATTR_HW_ADDR] || ieee802154_llsec_parse_key_id(info, &key.key_id)) return -EINVAL; devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); return ops->llsec->add_devkey(dev, devaddr, &key); } int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info) { if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != (NLM_F_CREATE | NLM_F_EXCL)) return -EINVAL; return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey); } static int llsec_del_devkey(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct ieee802154_llsec_device_key key; __le64 devaddr; if (!info->attrs[IEEE802154_ATTR_HW_ADDR] || ieee802154_llsec_parse_key_id(info, &key.key_id)) return -EINVAL; devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); return ops->llsec->del_devkey(dev, devaddr, &key); } int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info) { return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey); } static int ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, __le64 devaddr, const struct ieee802154_llsec_device_key *devkey, const struct net_device *dev) { void *hdr; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, IEEE802154_LLSEC_LIST_DEVKEY); if (!hdr) goto out; if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr, IEEE802154_ATTR_PAD) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, devkey->frame_counter) || ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); out: return -EMSGSIZE; } static int llsec_iter_devkeys(struct llsec_dump_data *data) { struct ieee802154_llsec_device *dpos; struct ieee802154_llsec_device_key *kpos; int idx = 0, idx2; list_for_each_entry(dpos, &data->table->devices, list) { if (idx++ < data->s_idx) continue; idx2 = 0; list_for_each_entry(kpos, &dpos->keys, list) { if (idx2++ < data->s_idx2) continue; if (ieee802154_nl_fill_devkey(data->skb, data->portid, data->nlmsg_seq, dpos->hwaddr, kpos, data->dev)) { return -EMSGSIZE; } data->s_idx2++; } data->s_idx++; } return 0; } int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, struct netlink_callback *cb) { return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys); } static int llsec_parse_seclevel(struct genl_info *info, struct ieee802154_llsec_seclevel *sl) { memset(sl, 0, sizeof(*sl)); if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] || !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] || !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]) return -EINVAL; sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]); if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) { if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]) return -EINVAL; sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]); } sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]); sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); return 0; } static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct ieee802154_llsec_seclevel sl; if (llsec_parse_seclevel(info, &sl)) return -EINVAL; return ops->llsec->add_seclevel(dev, &sl); } int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info) { if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != (NLM_F_CREATE | NLM_F_EXCL)) return -EINVAL; return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel); } static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info) { struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct ieee802154_llsec_seclevel sl; if (llsec_parse_seclevel(info, &sl)) return -EINVAL; return ops->llsec->del_seclevel(dev, &sl); } int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info) { return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel); } static int ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq, const struct ieee802154_llsec_seclevel *sl, const struct net_device *dev) { void *hdr; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, IEEE802154_LLSEC_LIST_SECLEVEL); if (!hdr) goto out; if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) || nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, sl->device_override)) goto nla_put_failure; if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD && nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID, sl->cmd_frame_id)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); out: return -EMSGSIZE; } static int llsec_iter_seclevels(struct llsec_dump_data *data) { struct ieee802154_llsec_seclevel *pos; int rc = 0, idx = 0; list_for_each_entry(pos, &data->table->security_levels, list) { if (idx++ < data->s_idx) continue; if (ieee802154_nl_fill_seclevel(data->skb, data->portid, data->nlmsg_seq, pos, data->dev)) { rc = -EMSGSIZE; break; } data->s_idx++; } return rc; } int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, struct netlink_callback *cb) { return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels); } |
| 85 86 86 49 81 81 72 83 370 933 932 5 5 5 896 892 896 30 30 26 30 397 848 424 426 1089 618 494 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/net/l3mdev.h - L3 master device API * Copyright (c) 2015 Cumulus Networks * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> */ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ #include <net/dst.h> #include <net/fib_rules.h> enum l3mdev_type { L3MDEV_TYPE_UNSPEC, L3MDEV_TYPE_VRF, __L3MDEV_TYPE_MAX }; #define L3MDEV_TYPE_MAX (__L3MDEV_TYPE_MAX - 1) typedef int (*lookup_by_table_id_t)(struct net *net, u32 table_d); /** * struct l3mdev_ops - l3mdev operations * * @l3mdev_fib_table: Get FIB table id to use for lookups * * @l3mdev_l3_rcv: Hook in L3 receive path * * @l3mdev_l3_out: Hook in L3 output path * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, struct sk_buff *skb, u16 proto); struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev, struct sock *sk, struct sk_buff *skb, u16 proto); /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV int l3mdev_table_lookup_register(enum l3mdev_type l3type, lookup_by_table_id_t fn); void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, lookup_by_table_id_t fn); int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, u32 table_id); int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); static inline bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) { return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && fl->flowi_l3mdev == iifindex; } static inline bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) { return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && fl->flowi_l3mdev == oifindex; } void l3mdev_update_flow(struct net *net, struct flowi *fl); int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { int ifindex; rcu_read_lock(); ifindex = l3mdev_master_ifindex_rcu(dev); rcu_read_unlock(); return ifindex; } static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) { struct net_device *dev; int rc = 0; if (ifindex) { rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) rc = l3mdev_master_ifindex_rcu(dev); rcu_read_unlock(); } return rc; } static inline struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) { /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. * list_first_or_null_rcu does not handle a const arg. We aren't * making changes, just want the master device from that list so * typecast to remove the const */ struct net_device *dev = (struct net_device *)_dev; struct net_device *master; if (!dev) return NULL; if (netif_is_l3_master(dev)) master = dev; else if (netif_is_l3_slave(dev)) master = netdev_master_upper_dev_get_rcu(dev); else master = NULL; return master; } int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex); static inline int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) { rcu_read_lock(); ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex); rcu_read_unlock(); return ifindex; } u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) { u32 tb_id; rcu_read_lock(); tb_id = l3mdev_fib_table_rcu(dev); rcu_read_unlock(); return tb_id; } static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { struct net_device *dev; bool rc = false; if (ifindex == 0) return false; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) rc = netif_is_l3_master(dev); rcu_read_unlock(); return rc; } struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) { struct net_device *master = NULL; if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); else if (netif_is_l3_master(skb->dev) || netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) skb = master->l3mdev_ops->l3mdev_l3_rcv(master, skb, proto); return skb; } static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { return l3mdev_l3_rcv(skb, AF_INET); } static inline struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) { return l3mdev_l3_rcv(skb, AF_INET6); } static inline struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) { struct net_device *dev = skb_dst(skb)->dev; if (netif_is_l3_slave(dev)) { struct net_device *master; rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); rcu_read_unlock(); } return skb; } static inline struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) { return l3mdev_l3_out(sk, skb, AF_INET); } static inline struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) { return l3mdev_l3_out(sk, skb, AF_INET6); } #else static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) { return 0; } static inline int l3mdev_master_ifindex(struct net_device *dev) { return 0; } static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) { return 0; } static inline int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) { return 0; } static inline int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) { return 0; } static inline struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; } static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) { return 0; } static inline u32 l3mdev_fib_table(const struct net_device *dev) { return 0; } static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) { return 0; } static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { return false; } static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { return NULL; } static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { return skb; } static inline struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) { return skb; } static inline struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) { return skb; } static inline struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) { return skb; } static inline int l3mdev_table_lookup_register(enum l3mdev_type l3type, lookup_by_table_id_t fn) { return -EOPNOTSUPP; } static inline void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, lookup_by_table_id_t fn) { } static inline int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, u32 table_id) { return -ENODEV; } static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) { return 1; } static inline bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) { return false; } static inline bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) { return false; } static inline void l3mdev_update_flow(struct net *net, struct flowi *fl) { } #endif #endif /* _NET_L3MDEV_H_ */ |
| 9 9 9 9 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_REJECT_H #define _NF_REJECT_H #include <linux/types.h> #include <uapi/linux/in.h> static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff, __u8 proto) { /* Skip protocols that don't use 16-bit one's complement checksum * of the entire payload. */ switch (proto) { /* Protocols with optional checksums. */ case IPPROTO_UDP: { const struct udphdr *udp_hdr; struct udphdr _udp_hdr; udp_hdr = skb_header_pointer(skb, dataoff, sizeof(_udp_hdr), &_udp_hdr); if (!udp_hdr || udp_hdr->check) return true; return false; } case IPPROTO_GRE: /* Protocols with other integrity checks. */ case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_SCTP: /* Protocols with partial checksums. */ case IPPROTO_UDPLITE: return false; } return true; } #endif /* _NF_REJECT_H */ |
| 7 7 3 3 2 1 1 2 1 2 3 1 2 1 3 1 2 1 2 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 | // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2022, Intel Corporation. */ #include "protocol.h" #include "mib.h" #include "mptcp_pm_gen.h" #define mptcp_for_each_userspace_pm_addr(__msk, __entry) \ list_for_each_entry(__entry, \ &((__msk)->pm.userspace_pm_local_addr_list), list) void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *tmp; struct sock *sk = (struct sock *)msk; LIST_HEAD(free_list); spin_lock_bh(&msk->pm.lock); list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list); spin_unlock_bh(&msk->pm.lock); list_for_each_entry_safe(entry, tmp, &free_list, list) { sock_kfree_s(sk, entry, sizeof(*entry)); } } static struct mptcp_pm_addr_entry * mptcp_userspace_pm_lookup_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { struct mptcp_pm_addr_entry *entry; mptcp_for_each_userspace_pm_addr(msk, entry) { if (mptcp_addresses_equal(&entry->addr, addr, false)) return entry; } return NULL; } static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry, bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *e; bool addr_match = false; bool id_match = false; int ret = -EINVAL; bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); spin_lock_bh(&msk->pm.lock); mptcp_for_each_userspace_pm_addr(msk, e) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match || id_match) break; __set_bit(e->addr.id, id_bitmap); } if (!addr_match && !id_match) { /* Memory for the entry is allocated from the * sock option buffer. */ e = sock_kmemdup(sk, entry, sizeof(*entry), GFP_ATOMIC); if (!e) { ret = -ENOMEM; goto append_err; } if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); msk->pm.local_addr_used++; ret = e->addr.id; } else if (addr_match && id_match) { ret = entry->addr.id; } append_err: spin_unlock_bh(&msk->pm.lock); return ret; } /* If the subflow is closed from the other peer (not via a * subflow destroy command then), we want to keep the entry * not to assign the same ID to another address and to be * able to send RM_ADDR after the removal of the subflow. */ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *addr) { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; entry = mptcp_userspace_pm_lookup_addr(msk, &addr->addr); if (!entry) return -EINVAL; /* TODO: a refcount is needed because the entry can * be used multiple times (e.g. fullmesh mode). */ list_del_rcu(&entry->list); sock_kfree_s(sk, entry, sizeof(*entry)); msk->pm.local_addr_used--; return 0; } static struct mptcp_pm_addr_entry * mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) { struct mptcp_pm_addr_entry *entry; mptcp_for_each_userspace_pm_addr(msk, entry) { if (entry->addr.id == id) return entry; } return NULL; } int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *skc) { __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; struct mptcp_pm_addr_entry *entry; spin_lock_bh(&msk->pm.lock); entry = mptcp_userspace_pm_lookup_addr(msk, &skc->addr); spin_unlock_bh(&msk->pm.lock); if (entry) return entry->addr.id; if (skc->addr.port == msk_sport) skc->addr.port = 0; return mptcp_userspace_pm_append_new_local_addr(msk, skc, true); } bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct mptcp_pm_addr_entry *entry; bool backup; spin_lock_bh(&msk->pm.lock); entry = mptcp_userspace_pm_lookup_addr(msk, skc); backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); spin_unlock_bh(&msk->pm.lock); return backup; } static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *info) { struct mptcp_sock *msk; struct nlattr *token; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_TOKEN)) return NULL; token = info->attrs[MPTCP_PM_ATTR_TOKEN]; msk = mptcp_token_get_sock(genl_info_net(info), nla_get_u32(token)); if (!msk) { NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); return NULL; } if (!mptcp_pm_is_userspace(msk)) { NL_SET_ERR_MSG_ATTR(info->extack, token, "userspace PM not selected"); sock_put((struct sock *)msk); return NULL; } return msk; } int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry addr_val; struct mptcp_sock *msk; struct nlattr *addr; int err = -EINVAL; struct sock *sk; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) return err; msk = mptcp_userspace_pm_get_sock(info); if (!msk) return err; sk = (struct sock *)msk; addr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(addr, info, true, &addr_val); if (err < 0) goto announce_err; if (addr_val.addr.id == 0) { NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr id"); err = -EINVAL; goto announce_err; } if (!(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr flags"); err = -EINVAL; goto announce_err; } err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, addr, "did not match address and id"); goto announce_err; } lock_sock(sk); spin_lock_bh(&msk->pm.lock); if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); mptcp_pm_addr_send_ack(msk); } spin_unlock_bh(&msk->pm.lock); release_sock(sk); err = 0; announce_err: sock_put(sk); return err; } static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk) { struct mptcp_rm_list list = { .nr = 0 }; struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; bool has_id_0 = false; int err = -EINVAL; lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { if (READ_ONCE(subflow->local_id) == 0) { has_id_0 = true; break; } } if (!has_id_0) goto remove_err; list.ids[list.nr++] = 0; spin_lock_bh(&msk->pm.lock); mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); err = 0; remove_err: release_sock(sk); return err; } void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry) { struct mptcp_rm_list alist = { .nr = 0 }; int anno_nr = 0; /* only delete if either announced or matching a subflow */ if (mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) anno_nr++; else if (!mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) return; alist.ids[alist.nr++] = entry->addr.id; spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_signaled -= anno_nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry *match; struct mptcp_sock *msk; struct nlattr *id; int err = -EINVAL; struct sock *sk; u8 id_val; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_LOC_ID)) return err; id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; id_val = nla_get_u8(id); msk = mptcp_userspace_pm_get_sock(info); if (!msk) return err; sk = (struct sock *)msk; if (id_val == 0) { err = mptcp_userspace_pm_remove_id_zero_address(msk); goto out; } lock_sock(sk); spin_lock_bh(&msk->pm.lock); match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { spin_unlock_bh(&msk->pm.lock); release_sock(sk); goto out; } list_del_rcu(&match->list); spin_unlock_bh(&msk->pm.lock); mptcp_pm_remove_addr_entry(msk, match); release_sock(sk); kfree_rcu_mightsleep(match); /* Adjust sk_omem_alloc like sock_kfree_s() does, to match * with allocation of this memory by sock_kmemdup() */ atomic_sub(sizeof(*match), &sk->sk_omem_alloc); err = 0; out: if (err) NL_SET_ERR_MSG_ATTR_FMT(info->extack, id, "address with id %u not found", id_val); sock_put(sk); return err; } int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry entry = { 0 }; struct mptcp_addr_info addr_r; struct nlattr *raddr, *laddr; struct mptcp_pm_local local; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; msk = mptcp_userspace_pm_get_sock(info); if (!msk) return err; sk = (struct sock *)msk; laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &entry); if (err < 0) goto create_err; if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "invalid addr flags"); err = -EINVAL; goto create_err; } entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) goto create_err; if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; goto create_err; } err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "did not match address and id"); goto create_err; } local.addr = entry.addr; local.flags = entry.flags; local.ifindex = entry.ifindex; lock_sock(sk); err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); if (err) GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err); spin_lock_bh(&msk->pm.lock); if (err) mptcp_userspace_pm_delete_local_addr(msk, &entry); else msk->pm.extra_subflows++; spin_unlock_bh(&msk->pm.lock); create_err: sock_put(sk); return err; } static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, const struct mptcp_addr_info *local, const struct mptcp_addr_info *remote) { struct mptcp_subflow_context *subflow; if (local->family != remote->family) return NULL; mptcp_for_each_subflow(msk, subflow) { const struct inet_sock *issk; struct sock *ssk; ssk = mptcp_subflow_tcp_sock(subflow); if (local->family != ssk->sk_family) continue; issk = inet_sk(ssk); switch (ssk->sk_family) { case AF_INET: if (issk->inet_saddr != local->addr.s_addr || issk->inet_daddr != remote->addr.s_addr) continue; break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { if (!ipv6_addr_equal(&local->addr6, &issk->pinet6->saddr) || !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr)) continue; break; } #endif default: continue; } if (issk->inet_sport == local->port && issk->inet_dport == remote->port) return ssk; } return NULL; } int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry addr_l; struct mptcp_addr_info addr_r; struct nlattr *raddr, *laddr; struct mptcp_sock *msk; struct sock *sk, *ssk; int err = -EINVAL; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; msk = mptcp_userspace_pm_get_sock(info); if (!msk) return err; sk = (struct sock *)msk; laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &addr_l); if (err < 0) goto destroy_err; raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) goto destroy_err; #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr_l.addr.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { ipv6_addr_set_v4mapped(addr_l.addr.addr.s_addr, &addr_l.addr.addr6); addr_l.addr.family = AF_INET6; } if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr.addr6)) { ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); addr_r.family = AF_INET6; } #endif if (addr_l.addr.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; goto destroy_err; } if (!addr_l.addr.port) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local port"); err = -EINVAL; goto destroy_err; } if (!addr_r.port) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "missing remote port"); err = -EINVAL; goto destroy_err; } lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r); if (!ssk) { GENL_SET_ERR_MSG(info, "subflow not found"); err = -ESRCH; goto release_sock; } spin_lock_bh(&msk->pm.lock); mptcp_userspace_pm_delete_local_addr(msk, &addr_l); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); mptcp_close_ssk(sk, ssk, mptcp_subflow_ctx(ssk)); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); release_sock: release_sock(sk); destroy_err: sock_put(sk); return err; } int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, struct genl_info *info) { struct mptcp_addr_info rem = { .family = AF_UNSPEC, }; struct mptcp_pm_addr_entry *entry; struct nlattr *attr, *attr_rem; struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; u8 bkup = 0; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return ret; msk = mptcp_userspace_pm_get_sock(info); if (!msk) return ret; sk = (struct sock *)msk; attr = info->attrs[MPTCP_PM_ATTR_ADDR]; if (local->addr.family == AF_UNSPEC) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid local address family"); ret = -EINVAL; goto set_flags_err; } attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; ret = mptcp_pm_parse_addr(attr_rem, info, &rem); if (ret < 0) goto set_flags_err; if (rem.family == AF_UNSPEC) { NL_SET_ERR_MSG_ATTR(info->extack, attr_rem, "invalid remote address family"); ret = -EINVAL; goto set_flags_err; } if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; spin_lock_bh(&msk->pm.lock); entry = mptcp_userspace_pm_lookup_addr(msk, &local->addr); if (entry) { if (bkup) entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; } spin_unlock_bh(&msk->pm.lock); lock_sock(sk); ret = mptcp_pm_mp_prio_send_ack(msk, &local->addr, &rem, bkup); release_sock(sk); /* mptcp_pm_mp_prio_send_ack() only fails in one case */ if (ret < 0) GENL_SET_ERR_MSG(info, "subflow not found"); set_flags_err: sock_put(sk); return ret; } int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { struct id_bitmap { DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1); } *bitmap; const struct genl_info *info = genl_info_dump(cb); struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; BUILD_BUG_ON(sizeof(struct id_bitmap) > sizeof(cb->ctx)); bitmap = (struct id_bitmap *)cb->ctx; msk = mptcp_userspace_pm_get_sock(info); if (!msk) return ret; sk = (struct sock *)msk; lock_sock(sk); spin_lock_bh(&msk->pm.lock); mptcp_for_each_userspace_pm_addr(msk, entry) { if (test_bit(entry->addr.id, bitmap->map)) continue; if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) break; __set_bit(entry->addr.id, bitmap->map); } spin_unlock_bh(&msk->pm.lock); release_sock(sk); ret = msg->len; sock_put(sk); return ret; } int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info) { struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; msk = mptcp_userspace_pm_get_sock(info); if (!msk) return ret; sk = (struct sock *)msk; lock_sock(sk); spin_lock_bh(&msk->pm.lock); entry = mptcp_userspace_pm_lookup_addr_by_id(msk, id); if (entry) { *addr = *entry; ret = 0; } spin_unlock_bh(&msk->pm.lock); release_sock(sk); sock_put(sk); return ret; } static struct mptcp_pm_ops mptcp_pm_userspace = { .name = "userspace", .owner = THIS_MODULE, }; void __init mptcp_pm_userspace_register(void) { mptcp_pm_register(&mptcp_pm_userspace); } |
| 1 8 1 8 10 1 98 98 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | // SPDX-License-Identifier: GPL-2.0-or-later /* * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: * Fixes: */ #define pr_fmt(fmt) "UDPLite: " fmt #include <linux/export.h> #include <linux/proc_fs.h> #include "udp_impl.h" struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); /* Designate sk as UDP-Lite socket */ static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; } static int udplite_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static int udplite_err(struct sk_buff *skb, u32 info) { return __udp4_lib_err(skb, info, &udplite_table); } static const struct net_protocol udplite_protocol = { .handler = udplite_rcv, .err_handler = udplite_err, .no_policy = 1, }; struct proto udplite_prot = { .name = "UDP-Lite", .owner = THIS_MODULE, .close = udp_lib_close, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udplite_sk_init, .destroy = udp_destroy_sock, .setsockopt = udp_setsockopt, .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, }; EXPORT_SYMBOL(udplite_prot); static struct inet_protosw udplite4_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, .flags = INET_PROTOSW_PERMANENT, }; #ifdef CONFIG_PROC_FS static struct udp_seq_afinfo udplite4_seq_afinfo = { .family = AF_INET, .udp_table = &udplite_table, }; static int __net_init udplite4_proc_init_net(struct net *net) { if (!proc_create_net_data("udplite", 0444, net->proc_net, &udp_seq_ops, sizeof(struct udp_iter_state), &udplite4_seq_afinfo)) return -ENOMEM; return 0; } static void __net_exit udplite4_proc_exit_net(struct net *net) { remove_proc_entry("udplite", net->proc_net); } static struct pernet_operations udplite4_net_ops = { .init = udplite4_proc_init_net, .exit = udplite4_proc_exit_net, }; static __init int udplite4_proc_init(void) { return register_pernet_subsys(&udplite4_net_ops); } #else static inline int udplite4_proc_init(void) { return 0; } #endif void __init udplite4_register(void) { udp_table_init(&udplite_table, "UDP-Lite"); if (proto_register(&udplite_prot, 1)) goto out_register_err; if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) goto out_unregister_proto; inet_register_protosw(&udplite4_protosw); if (udplite4_proc_init()) pr_err("%s: Cannot register /proc!\n", __func__); return; out_unregister_proto: proto_unregister(&udplite_prot); out_register_err: pr_crit("%s: Cannot add UDP-Lite protocol\n", __func__); } |
| 144 132 144 144 144 144 144 144 144 144 143 144 144 144 144 130 130 130 130 130 130 144 143 144 144 16 16 132 132 132 71 132 132 144 16 16 16 132 132 130 4 132 132 132 132 132 131 132 132 132 144 131 144 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/readpage.c * * Copyright (C) 2002, Linus Torvalds. * Copyright (C) 2015, Google, Inc. * * This was originally taken from fs/mpage.c * * The ext4_mpage_readpages() function here is intended to * replace mpage_readahead() in the general case, not just for * encrypted files. It has some limitations (see below), where it * will fall back to read_block_full_page(), but these limitations * should only be hit when page_size != block_size. * * This will allow us to attach a callback function to support ext4 * encryption. * * If anything unusual happens, such as: * * - encountering a page which has buffers * - encountering a page which has a non-hole after a hole * - encountering a page with non-contiguous blocks * * then this code just gives up and calls the buffer_head-based read function. * It does handle a page which has holes at the end - that is a common case: * the end-of-file on blocksize < PAGE_SIZE setups. * */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/kdev_t.h> #include <linux/gfp.h> #include <linux/bio.h> #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/highmem.h> #include <linux/prefetch.h> #include <linux/mpage.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> #include "ext4.h" #define NUM_PREALLOC_POST_READ_CTXS 128 static struct kmem_cache *bio_post_read_ctx_cache; static mempool_t *bio_post_read_ctx_pool; /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_INITIAL = 0, STEP_DECRYPT, STEP_VERITY, STEP_MAX, }; struct bio_post_read_ctx { struct bio *bio; struct work_struct work; unsigned int cur_step; unsigned int enabled_steps; }; static void __read_end_io(struct bio *bio) { struct folio_iter fi; bio_for_each_folio_all(fi, bio) folio_end_read(fi.folio, bio->bi_status == 0); if (bio->bi_private) mempool_free(bio->bi_private, bio_post_read_ctx_pool); bio_put(bio); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx); static void decrypt_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; if (fscrypt_decrypt_bio(bio)) bio_post_read_processing(ctx); else __read_end_io(bio); } static void verity_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; /* * fsverity_verify_bio() may call readahead() again, and although verity * will be disabled for that, decryption may still be needed, causing * another bio_post_read_ctx to be allocated. So to guarantee that * mempool_alloc() never deadlocks we must free the current ctx first. * This is safe because verity is the last post-read step. */ BUILD_BUG_ON(STEP_VERITY + 1 != STEP_MAX); mempool_free(ctx, bio_post_read_ctx_pool); bio->bi_private = NULL; fsverity_verify_bio(bio); __read_end_io(bio); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx) { /* * We use different work queues for decryption and for verity because * verity may require reading metadata pages that need decryption, and * we shouldn't recurse to the same workqueue. */ switch (++ctx->cur_step) { case STEP_DECRYPT: if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { INIT_WORK(&ctx->work, decrypt_work); fscrypt_enqueue_decrypt_work(&ctx->work); return; } ctx->cur_step++; fallthrough; case STEP_VERITY: if (ctx->enabled_steps & (1 << STEP_VERITY)) { INIT_WORK(&ctx->work, verity_work); fsverity_enqueue_verify_work(&ctx->work); return; } ctx->cur_step++; fallthrough; default: __read_end_io(ctx->bio); } } static bool bio_post_read_required(struct bio *bio) { return bio->bi_private && !bio->bi_status; } /* * I/O completion handler for multipage BIOs. * * The mpage code never puts partial pages into a BIO (except for end-of-file). * If a page does not map to a contiguous run of blocks then it simply falls * back to block_read_full_folio(). * * Why is this? If a page's completion depends on a number of different BIOs * which can complete in any order (or at the same time) then determining the * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ static void mpage_end_io(struct bio *bio) { if (bio_post_read_required(bio)) { struct bio_post_read_ctx *ctx = bio->bi_private; ctx->cur_step = STEP_INITIAL; bio_post_read_processing(ctx); return; } __read_end_io(bio); } static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx) { return fsverity_active(inode) && idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); } static void ext4_set_bio_post_read_ctx(struct bio *bio, const struct inode *inode, pgoff_t first_idx) { unsigned int post_read_steps = 0; if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= 1 << STEP_DECRYPT; if (ext4_need_verity(inode, first_idx)) post_read_steps |= 1 << STEP_VERITY; if (post_read_steps) { /* Due to the mempool, this never fails. */ struct bio_post_read_ctx *ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); ctx->bio = bio; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; } } static inline loff_t ext4_readpage_limit(struct inode *inode) { if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) return inode->i_sb->s_maxbytes; return i_size_read(inode); } int ext4_mpage_readpages(struct inode *inode, struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; const unsigned blkbits = inode->i_blkbits; const unsigned blocksize = 1 << blkbits; sector_t next_block; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t first_block; unsigned page_block; struct block_device *bdev = inode->i_sb->s_bdev; int length; unsigned relative_block = 0; struct ext4_map_blocks map; unsigned int nr_pages, folio_pages; map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; map.m_flags = 0; nr_pages = rac ? readahead_count(rac) : folio_nr_pages(folio); for (; nr_pages; nr_pages -= folio_pages) { int fully_mapped = 1; unsigned int first_hole; unsigned int blocks_per_folio; if (rac) folio = readahead_folio(rac); folio_pages = folio_nr_pages(folio); prefetchw(&folio->flags); if (folio_buffers(folio)) goto confused; blocks_per_folio = folio_size(folio) >> blkbits; first_hole = blocks_per_folio; block_in_file = next_block = EXT4_PG_TO_LBLK(inode, folio->index); last_block = EXT4_PG_TO_LBLK(inode, folio->index + nr_pages); last_block_in_file = (ext4_readpage_limit(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; /* * Map blocks using the previous result first. */ if ((map.m_flags & EXT4_MAP_MAPPED) && block_in_file > map.m_lblk && block_in_file < (map.m_lblk + map.m_len)) { unsigned map_offset = block_in_file - map.m_lblk; unsigned last = map.m_len - map_offset; first_block = map.m_pblk + map_offset; for (relative_block = 0; ; relative_block++) { if (relative_block == last) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; } if (page_block == blocks_per_folio) break; page_block++; block_in_file++; } } /* * Then do more ext4_map_blocks() calls until we are * done with this folio. */ while (page_block < blocks_per_folio) { if (block_in_file < last_block) { map.m_lblk = block_in_file; map.m_len = last_block - block_in_file; if (ext4_map_blocks(NULL, inode, &map, 0) < 0) { set_error_page: folio_zero_segment(folio, 0, folio_size(folio)); folio_unlock(folio); goto next_page; } } if ((map.m_flags & EXT4_MAP_MAPPED) == 0) { fully_mapped = 0; if (first_hole == blocks_per_folio) first_hole = page_block; page_block++; block_in_file++; continue; } if (first_hole != blocks_per_folio) goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ if (!page_block) first_block = map.m_pblk; else if (first_block + page_block != map.m_pblk) goto confused; for (relative_block = 0; ; relative_block++) { if (relative_block == map.m_len) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; } else if (page_block == blocks_per_folio) break; page_block++; block_in_file++; } } if (first_hole != blocks_per_folio) { folio_zero_segment(folio, first_hole << blkbits, folio_size(folio)); if (first_hole == 0) { if (ext4_need_verity(inode, folio->index) && !fsverity_verify_folio(folio)) goto set_error_page; folio_end_read(folio, true); continue; } } else if (fully_mapped) { folio_set_mappedtodisk(folio); } /* * This folio will go to BIO. Do we need to send this * BIO off first? */ if (bio && (last_block_in_bio != first_block - 1 || !fscrypt_mergeable_bio(bio, inode, next_block))) { submit_and_realloc: submit_bio(bio); bio = NULL; } if (bio == NULL) { /* * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset(). */ bio = bio_alloc(bdev, bio_max_segs(nr_pages), REQ_OP_READ, GFP_KERNEL); fscrypt_set_bio_crypt_ctx(bio, inode, next_block, GFP_KERNEL); ext4_set_bio_post_read_ctx(bio, inode, folio->index); bio->bi_iter.bi_sector = first_block << (blkbits - 9); bio->bi_end_io = mpage_end_io; if (rac) bio->bi_opf |= REQ_RAHEAD; } length = first_hole << blkbits; if (!bio_add_folio(bio, folio, length, 0)) goto submit_and_realloc; if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_folio)) { submit_bio(bio); bio = NULL; } else last_block_in_bio = first_block + blocks_per_folio - 1; continue; confused: if (bio) { submit_bio(bio); bio = NULL; } if (!folio_test_uptodate(folio)) block_read_full_folio(folio, ext4_get_block); else folio_unlock(folio); next_page: ; /* A label shall be followed by a statement until C23 */ } if (bio) submit_bio(bio); return 0; } int __init ext4_init_post_read_processing(void) { bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, SLAB_RECLAIM_ACCOUNT); if (!bio_post_read_ctx_cache) goto fail; bio_post_read_ctx_pool = mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, bio_post_read_ctx_cache); if (!bio_post_read_ctx_pool) goto fail_free_cache; return 0; fail_free_cache: kmem_cache_destroy(bio_post_read_ctx_cache); fail: return -ENOMEM; } void ext4_exit_post_read_processing(void) { mempool_destroy(bio_post_read_ctx_pool); kmem_cache_destroy(bio_post_read_ctx_cache); } |
| 4 5 41 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IRQDESC_H #define _LINUX_IRQDESC_H #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/mutex.h> /* * Core internal functions to deal with irq descriptors */ struct irq_affinity_notify; struct proc_dir_entry; struct module; struct irq_desc; struct irq_domain; struct pt_regs; /** * struct irqstat - interrupt statistics * @cnt: real-time interrupt count * @ref: snapshot of interrupt count */ struct irqstat { unsigned int cnt; #ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT unsigned int ref; #endif }; /** * struct irq_desc - interrupt descriptor * @irq_common_data: per irq and chip data passed down to chip functions * @kstat_irqs: irq stats per cpu * @handle_irq: highlevel irq-events handler * @action: the irq action chain * @status_use_accessors: status information * @core_internal_state__do_not_mess_with_it: core internal status information * @depth: disable-depth, for nested irq_disable() calls * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers * @tot_count: stats field for non-percpu irqs * @irq_count: stats field to detect stalled irqs * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes * @pending_mask: pending rebalanced interrupts * @threads_oneshot: bitfield to handle shared oneshot threads * @threads_active: number of irqaction threads currently running * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers * @nr_actions: number of installed actions on this descriptor * @no_suspend_depth: number of irqactions on a irq descriptor with * IRQF_NO_SUSPEND set * @force_resume_depth: number of irqactions on a irq descriptor with * IRQF_FORCE_RESUME set * @rcu: rcu head for delayed free * @kobj: kobject used to represent this struct in sysfs * @request_mutex: mutex to protect request/free before locking desc->lock * @dir: /proc/irq/ procfs entry * @debugfs_file: dentry for the debugfs file * @name: flow handler name for /proc/interrupts output */ struct irq_desc { struct irq_common_data irq_common_data; struct irq_data irq_data; struct irqstat __percpu *kstat_irqs; irq_flow_handler_t handle_irq; struct irqaction *action; /* IRQ action list */ unsigned int status_use_accessors; unsigned int core_internal_state__do_not_mess_with_it; unsigned int depth; /* nested irq disables */ unsigned int wake_depth; /* nested wake enables */ unsigned int tot_count; unsigned int irq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; raw_spinlock_t lock; struct cpumask *percpu_enabled; #ifdef CONFIG_SMP const struct cpumask *affinity_hint; struct irq_affinity_notify *affinity_notify; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif #endif unsigned long threads_oneshot; atomic_t threads_active; wait_queue_head_t wait_for_threads; #ifdef CONFIG_PM_SLEEP unsigned int nr_actions; unsigned int no_suspend_depth; unsigned int cond_suspend_depth; unsigned int force_resume_depth; #endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS struct dentry *debugfs_file; const char *dev_name; #endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; struct kobject kobj; #endif struct mutex request_mutex; int parent_irq; struct module *owner; const char *name; #ifdef CONFIG_HARDIRQS_SW_RESEND struct hlist_node resend_node; #endif } ____cacheline_internodealigned_in_smp; #ifdef CONFIG_SPARSE_IRQ extern void irq_lock_sparse(void); extern void irq_unlock_sparse(void); #else static inline void irq_lock_sparse(void) { } static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, unsigned int cpu) { return desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { return container_of(data->common, struct irq_desc, irq_common_data); } static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) { return desc->irq_data.irq; } static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; } static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc) { return desc->irq_data.chip; } static inline void *irq_desc_get_chip_data(struct irq_desc *desc) { return desc->irq_data.chip_data; } static inline void *irq_desc_get_handler_data(struct irq_desc *desc) { return desc->irq_common_data.handler_data; } /* * Architectures call this to let the generic IRQ layer * handle an interrupt. */ static inline void generic_handle_irq_desc(struct irq_desc *desc) { desc->handle_irq(desc); } int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); int generic_handle_irq_safe(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* * Convert a HW interrupt number to a logical one using a IRQ domain, * and handle the result interrupt number. Return -EINVAL if * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq); #endif /* Test to see if a driver has successfully requested an irq */ static inline int irq_desc_has_action(struct irq_desc *desc) { return desc && desc->action != NULL; } /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq * @handler: Flow control handler function for this interrupt * * Sets the handler in the irq descriptor associated to @data. * * Must be called with irq_desc locked and valid parameters. Typical * call site is the irq_set_type() callback. */ static inline void irq_set_handler_locked(struct irq_data *data, irq_flow_handler_t handler) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; } /** * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region * @data: Pointer to the irq_data structure for which the chip is set * @chip: Pointer to the new irq chip * @handler: Flow control handler function for this interrupt * @name: Name of the interrupt * * Replace the irq chip at the proper hierarchy level in @data and * sets the handler and name in the associated irq descriptor. * * Must be called with irq_desc locked and valid parameters. */ static inline void irq_set_chip_handler_name_locked(struct irq_data *data, const struct irq_chip *chip, irq_flow_handler_t handler, const char *name) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; desc->name = name; data->chip = (struct irq_chip *)chip; } bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); static inline bool irq_balancing_disabled(unsigned int irq) { return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); } static inline bool irq_is_percpu(unsigned int irq) { return irq_check_status_bit(irq, IRQ_PER_CPU); } static inline bool irq_is_percpu_devid(unsigned int irq) { return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class); static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class) { if (IS_ENABLED(CONFIG_LOCKDEP)) __irq_set_lockdep_class(irq, lock_class, request_class); } #endif |
| 5 1013 1017 1017 1012 1015 1020 133 132 115 115 114 1014 471 1019 1014 1011 1011 1010 1013 1020 1014 1012 1014 1012 1017 1015 1012 1014 1011 1012 1013 111 111 103 111 111 110 111 110 102 1 1 2 96 3 102 100 74 50 74 85 84 31 109 110 85 73 91 91 50 81 56 41 81 110 116 114 113 118 118 117 118 117 117 117 117 118 116 112 117 65 38 66 55 66 118 117 118 118 118 118 118 117 118 118 5 112 112 111 110 110 70 110 81 81 42 10 118 112 3 113 8 8 8 8 8 8 8 8 8 8 8 7 8 8 8 10 9 8 8 8 7 7 7 7 7 7 1 8 10 24 5 19 19 19 19 19 24 1 3 1 2 1 1 1 1 2 3 4 102 1 102 11 3 11 129 128 128 126 128 124 125 102 123 1 123 1 3 2 2 113 113 96 113 113 113 113 112 113 131 130 129 124 115 113 111 113 113 113 113 112 113 113 24 90 18 131 16 11 135 136 134 134 132 132 132 132 130 24 24 24 132 137 5 4 3 2 1 1 1 1 2 5 142 141 137 137 4 142 20 20 20 5 5 5 1 5 4 1 3 2 1 5 5 5 5 4 4 4 4 5 98 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 | // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> #include <net/ip.h> #include <net/compat.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/nf_log.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); void *ipt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ipt, IPT); } EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool ip_packet_match(const struct iphdr *ip, const char *indev, const char *outdev, const struct ipt_ip *ipinfo, int isfrag) { unsigned long ret; if (NF_INVF(ipinfo, IPT_INV_SRCIP, (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || NF_INVF(ipinfo, IPT_INV_DSTIP, (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) return false; ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) return false; /* Check specific protocol */ if (ipinfo->proto && NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) return false; /* If we have a fragment rule but the packet is not a fragment * then we return zero */ if (NF_INVF(ipinfo, IPT_INV_FRAG, (ipinfo->flags & IPT_F_FRAG) && !isfrag)) return false; return true; } static bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) return false; if (ip->invflags & ~IPT_INV_MASK) return false; return true; } static unsigned int ipt_error(struct sk_buff *skb, const struct xt_action_param *par) { net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } /* Performance critical */ static inline struct ipt_entry * get_entry(const void *base, unsigned int offset) { return (struct ipt_entry *)(base + offset); } /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; return e->target_offset == sizeof(struct ipt_entry) && memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; } /* for const-correctness */ static inline const struct xt_entry_target * ipt_get_target_c(const struct ipt_entry *e) { return ipt_get_target((struct ipt_entry *)e); } #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", [NF_INET_LOCAL_OUT] = "OUTPUT", [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { NF_IP_TRACE_COMMENT_RULE, NF_IP_TRACE_COMMENT_RETURN, NF_IP_TRACE_COMMENT_POLICY, }; static const char *const comments[] = { [NF_IP_TRACE_COMMENT_RULE] = "rule", [NF_IP_TRACE_COMMENT_RETURN] = "return", [NF_IP_TRACE_COMMENT_POLICY] = "policy", }; static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = 4, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { const struct xt_standard_target *t = (void *)ipt_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ *chainname = t->target.data; (*rulenum) = 0; } else if (s == e) { (*rulenum)++; if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] : comments[NF_IP_TRACE_COMMENT_RETURN]; } return 1; } else (*rulenum)++; return 0; } static void trace_packet(struct net *net, const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, const struct xt_table_info *private, const struct ipt_entry *e) { const struct ipt_entry *root; const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) if (get_chainname_rulenum(iter, e, hookname, &chainname, &comment, &rulenum) != 0) break; nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } #endif static inline struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) { return (void *)entry + entry->next_offset; } /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; const void *table_base; struct ipt_entry *e, **jumpstack; unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ stackidx = 0; ip = ip_hdr(skb); indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; acpar.state = state; WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; /* Switch to alternate jumpstack if we're being invoked via TEE. * TEE issues XT_CONTINUE verdict on original skb so we must not * clobber the jumpstack. * * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ if (static_key_false(&xt_tee_enabled)) jumpstack += private->stacksize * current->in_nf_duplicate; e = get_entry(table_base, private->hook_entry[hook]); do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; struct xt_counters *counter; WARN_ON(!e); if (!ip_packet_match(ip, indev, outdev, &e->ip, acpar.fragoff)) { no_match: e = ipt_next_entry(e); continue; } xt_ematch_foreach(ematch, e) { acpar.match = ematch->u.kernel.match; acpar.matchinfo = ematch->data; if (!acpar.match->match(skb, &acpar)) goto no_match; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target_c(e); WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(state->net, skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; v = ((struct xt_standard_target *)t)->verdict; if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { verdict = (unsigned int)(-v) - 1; break; } if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); } else { e = jumpstack[--stackidx]; e = ipt_next_entry(e); } continue; } if (table_base + v != ipt_next_entry(e) && !(e->ip.flags & IPT_F_GOTO)) { if (unlikely(stackidx >= private->stacksize)) { verdict = NF_DROP; break; } jumpstack[stackidx++] = e; } e = get_entry(table_base, v); continue; } acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == XT_CONTINUE) { /* Target might have changed stuff. */ ip = ip_hdr(skb); e = ipt_next_entry(e); } else { /* Verdict */ break; } } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); if (acpar.hotdrop) return NF_DROP; else return verdict; } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0, unsigned int *offsets) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ipt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; /* Set initial back pointer. */ e->counters.pcnt = pos; for (;;) { const struct xt_standard_target *t = (void *)ipt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && t->verdict < 0) || visited) { unsigned int oldpos, size; /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; /* We're at the start. */ if (pos == oldpos) goto next; e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; pos += size; } else { int newpos = t->verdict; if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } } next: ; } return 1; } static void cleanup_match(struct xt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV4; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; par->match = m->u.kernel.match; par->matchinfo = m->data; return xt_check_match(par, m->u.match_size - sizeof(*m), ip->proto, ip->invflags & IPT_INV_PROTO); } static int find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; ret = check_match(m, par); if (ret) goto err; return 0; err: module_put(m->u.kernel.match->me); return ret; } static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_IPV4, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), e->ip.proto, e->ip.invflags & IPT_INV_PROTO); } static int find_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV4; xt_ematch_foreach(ematch, e) { ret = find_check_match(ematch, &mtpar); if (ret != 0) goto cleanup_matches; ++j; } t = ipt_get_target(e); target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; ret = check_target(e, net, name); if (ret) goto err; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; cleanup_match(ematch, net); } xt_percpu_counter_free(&e->counters); return ret; } static bool check_underflow(const struct ipt_entry *e) { const struct xt_entry_target *t; unsigned int verdict; if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct xt_standard_target *)t)->verdict; verdict = -verdict - 1; return verdict == NF_DROP || verdict == NF_ACCEPT; } static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, unsigned int valid_hooks) { unsigned int h; int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) return -EINVAL; if (!ip_checkentry(&e->ip)) return -EINVAL; err = xt_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (err) return err; /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) return -EINVAL; newinfo->underflow[h] = underflows[h]; } } /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; return 0; } static void cleanup_entry(struct ipt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) cleanup_match(ematch, net); t = ipt_get_target(e); par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV4; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ipt_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ipt_entry *iter; unsigned int *offsets; unsigned int i; int ret = 0; newinfo->size = repl->size; newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } offsets = xt_alloc_entry_offsets(newinfo->number); if (!offsets) return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { ret = check_entry_size_and_hooks(iter, newinfo, entry0, entry0 + repl->size, repl->hook_entry, repl->underflow, repl->valid_hooks); if (ret != 0) goto out_free; if (i < repl->num_entries) offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ipt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } ret = -EINVAL; if (i != repl->num_entries) goto out_free; ret = xt_check_table_hooks(newinfo, repl->valid_hooks); if (ret) goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; goto out_free; } kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; ++i; } if (ret != 0) { xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; cleanup_entry(iter, net); } return ret; } return ret; out_free: kvfree(offsets); return ret; } static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ipt_entry *iter; unsigned int cpu; unsigned int i; for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); bcnt = tmp->bcnt; pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ cond_resched(); } } } static void get_old_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ipt_entry *iter; unsigned int cpu, i; for_each_possible_cpu(cpu) { i = 0; xt_entry_foreach(iter, t->entries, t->size) { const struct xt_counters *tmp; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); ++i; /* macro does multi eval of i */ } cond_resched(); } } static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); get_counters(private, counters); return counters; } static int copy_entries_to_user(unsigned int total_size, const struct xt_table *table, void __user *userptr) { unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); loc_cpu_entry = private->entries; /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; const struct xt_entry_match *m; const struct xt_entry_target *t; e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; } if (copy_to_user(userptr + off + offsetof(struct ipt_entry, counters), &counters[num], sizeof(counters[num])) != 0) { ret = -EFAULT; goto free_counters; } for (i = sizeof(struct ipt_entry); i < e->target_offset; i += m->u.match_size) { m = (void *)e + i; if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ipt_get_target_c(e); if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } } free_counters: vfree(counters); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v > 0) v += xt_compat_calc_jump(AF_INET, v); memcpy(dst, &v, sizeof(v)); } static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv > 0) cv -= xt_compat_calc_jump(AF_INET, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static int compat_calc_entry(const struct ipt_entry *e, const struct xt_table_info *info, const void *base, struct xt_table_info *newinfo) { const struct xt_entry_match *ematch; const struct xt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - base; xt_ematch_foreach(ematch, e) off += xt_compat_match_offset(ematch->u.kernel.match); t = ipt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_INET_NUMHOOKS; i++) { if (info->hook_entry[i] && (e < (struct ipt_entry *)(base + info->hook_entry[i]))) newinfo->hook_entry[i] -= off; if (info->underflow[i] && (e < (struct ipt_entry *)(base + info->underflow[i]))) newinfo->underflow[i] -= off; } return 0; } static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ipt_entry *iter; const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; ret = xt_compat_init_offsets(AF_INET, info->number); if (ret) return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) return ret; } return 0; } #endif static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; int ret; if (*len != sizeof(struct ipt_getinfo)) return -EINVAL; if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_lock(AF_INET); #endif t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct xt_table_info tmp; if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; } #endif memset(&info, 0, sizeof(info)); info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); |