92 93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 | /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/backing-dev.h * * low-level device information and state which is propagated up through * to high-level code. */ #ifndef _LINUX_BACKING_DEV_H #define _LINUX_BACKING_DEV_H #include <linux/kernel.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/device.h> #include <linux/writeback.h> #include <linux/backing-dev-defs.h> #include <linux/slab.h> static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); return bdi; } struct backing_dev_info *bdi_get_by_id(u64 id); void bdi_put(struct backing_dev_info *bdi); __printf(2, 3) int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...); __printf(2, 0) int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args); void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner); void bdi_unregister(struct backing_dev_info *bdi); struct backing_dev_info *bdi_alloc(int node_id); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); void wb_wait_for_completion(struct wb_completion *done); extern spinlock_t bdi_lock; extern struct list_head bdi_list; extern struct workqueue_struct *bdi_wq; static inline bool wb_has_dirty_io(struct bdi_writeback *wb) { return test_bit(WB_has_dirty_io, &wb->state); } static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) { /* * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are * any dirty wbs. See wb_update_write_bandwidth(). */ return atomic_long_read(&bdi->tot_write_bandwidth); } static inline void wb_stat_mod(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { wb_stat_mod(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { wb_stat_mod(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { return percpu_counter_read_positive(&wb->stat[item]); } static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item) { return percpu_counter_sum_positive(&wb->stat[item]); } extern void wb_writeout_inc(struct bdi_writeback *wb); /* * maximal error of a stat counter. */ static inline unsigned long wb_stat_error(void) { #ifdef CONFIG_SMP return nr_cpu_ids * WB_STAT_BATCH; #else return 1; #endif } /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* * Flags in backing_dev_info::capability * * BDI_CAP_WRITEBACK: Supports dirty page writeback, and dirty pages * should contribute to accounting * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold */ #define BDI_CAP_WRITEBACK (1 << 0) #define BDI_CAP_WRITEBACK_ACCT (1 << 1) #define BDI_CAP_STRICTLIMIT (1 << 2) extern struct backing_dev_info noop_backing_dev_info; int bdi_init(struct backing_dev_info *bdi); /** * writeback_in_progress - determine whether there is writeback in progress * @wb: bdi_writeback of interest * * Determine whether there is writeback waiting to be handled against a * bdi_writeback. */ static inline bool writeback_in_progress(struct bdi_writeback *wb) { return test_bit(WB_writeback_running, &wb->state); } struct backing_dev_info *inode_to_bdi(struct inode *inode); static inline bool mapping_can_writeback(struct address_space *mapping) { return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; } #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css); struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp); void wb_memcg_offline(struct mem_cgroup *memcg); void wb_blkcg_offline(struct cgroup_subsys_state *css); /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode * @inode: inode of interest * * Cgroup writeback requires support from the filesystem. Also, both memcg and * iocg have to be on the default hierarchy. Test whether all conditions are * met. * * Note that the test result may change dynamically on the same inode * depending on how memcg and iocg are configured. */ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); return cgroup_subsys_on_dfl(memory_cgrp_subsys) && cgroup_subsys_on_dfl(io_cgrp_subsys) && (bdi->capabilities & BDI_CAP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } /** * wb_find_current - find wb for %current on a bdi * @bdi: bdi of interest * * Find the wb of @bdi which matches both the memcg and blkcg of %current. * Must be called under rcu_read_lock() which protects the returend wb. * NULL if not found. */ static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) { struct cgroup_subsys_state *memcg_css; struct bdi_writeback *wb; memcg_css = task_css(current, memory_cgrp_id); if (!memcg_css->parent) return &bdi->wb; wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); /* * %current's blkcg equals the effective blkcg of its memcg. No * need to use the relatively expensive cgroup_get_e_css(). */ if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id))) return wb; return NULL; } /** * wb_get_create_current - get or create wb for %current on a bdi * @bdi: bdi of interest * @gfp: allocation mask * * Equivalent to wb_get_create() on %current's memcg. This function is * called from a relatively hot path and optimizes the common cases using * wb_find_current(). */ static inline struct bdi_writeback * wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) { struct bdi_writeback *wb; rcu_read_lock(); wb = wb_find_current(bdi); if (wb && unlikely(!wb_tryget(wb))) wb = NULL; rcu_read_unlock(); if (unlikely(!wb)) { struct cgroup_subsys_state *memcg_css; memcg_css = task_get_css(current, memory_cgrp_id); wb = wb_get_create(bdi, memcg_css, gfp); css_put(memcg_css); } return wb; } /** * inode_to_wb - determine the wb of an inode * @inode: inode of interest * * Returns the wb @inode is currently associated with. The caller must be * holding either @inode->i_lock, the i_pages lock, or the * associated wb's list_lock. */ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && (!lockdep_is_held(&inode->i_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); #endif return inode->i_wb; } static inline struct bdi_writeback *inode_to_wb_wbc( struct inode *inode, struct writeback_control *wbc) { /* * If wbc does not have inode attached, it means cgroup writeback was * disabled when wbc started. Just use the default wb in that case. */ return wbc->wb ? wbc->wb : &inode_to_bdi(inode)->wb; } /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode * @cookie: output param, to be passed to the end function * * The caller wants to access the wb associated with @inode but isn't * holding inode->i_lock, the i_pages lock or wb->list_lock. This * function determines the wb associated with @inode and ensures that the * association doesn't change until the transaction is finished with * unlocked_inode_to_wb_end(). * * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and * can't sleep during the transaction. IRQs may or may not be disabled on * return. */ static inline struct bdi_writeback * unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { rcu_read_lock(); /* * Paired with store_release in inode_switch_wbs_work_fn() and * ensures that we see the new wb if we see cleared I_WB_SWITCH. */ cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; if (unlikely(cookie->locked)) xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags); /* * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages * lock. inode_to_wb() will bark. Deref directly. */ return inode->i_wb; } /** * unlocked_inode_to_wb_end - end inode wb access transaction * @inode: target inode * @cookie: @cookie from unlocked_inode_to_wb_begin() */ static inline void unlocked_inode_to_wb_end(struct inode *inode, struct wb_lock_cookie *cookie) { if (unlikely(cookie->locked)) xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags); rcu_read_unlock(); } #else /* CONFIG_CGROUP_WRITEBACK */ static inline bool inode_cgwb_enabled(struct inode *inode) { return false; } static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) { return &bdi->wb; } static inline struct bdi_writeback * wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) { return &bdi->wb; } static inline struct bdi_writeback *inode_to_wb(struct inode *inode) { return &inode_to_bdi(inode)->wb; } static inline struct bdi_writeback *inode_to_wb_wbc( struct inode *inode, struct writeback_control *wbc) { return inode_to_wb(inode); } static inline struct bdi_writeback * unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { return inode_to_wb(inode); } static inline void unlocked_inode_to_wb_end(struct inode *inode, struct wb_lock_cookie *cookie) { } static inline void wb_memcg_offline(struct mem_cgroup *memcg) { } static inline void wb_blkcg_offline(struct cgroup_subsys_state *css) { } #endif /* CONFIG_CGROUP_WRITEBACK */ const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | // SPDX-License-Identifier: GPL-2.0-only #ifndef _NFT_SET_PIPAPO_H #include <linux/log2.h> #include <net/ipv6.h> /* For the maximum length of a field */ /* Count of concatenated fields depends on count of 32-bit nftables registers */ #define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT /* Restrict usage to multiple fields, make sure rbtree is used otherwise */ #define NFT_PIPAPO_MIN_FIELDS 2 /* Largest supported field size */ #define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) #define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) /* Bits to be grouped together in table buckets depending on set size */ #define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET #define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8 #define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4 #define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \ BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \ (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4)) #define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb) /* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the * small group width, and switch to the big group width if the table gets * smaller than NFT_PIPAPO_LT_SIZE_LOW. * * Picking 2MiB as threshold (for a single table) avoids as much as possible * crossing page boundaries on most architectures (x86-64 and MIPS huge pages, * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which * keeps performance nice in case kvmalloc() gives us non-contiguous areas. */ #define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21) #define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16) #define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD #define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \ NFT_PIPAPO_LT_SIZE_HYSTERESIS /* Fields are padded to 32 bits in input registers */ #define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \ (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32))) #define NFT_PIPAPO_GROUPS_PADDING(f) \ (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \ NFT_PIPAPO_GROUPS_PER_BYTE(f)) /* Number of buckets given by 2 ^ n, with n bucket bits */ #define NFT_PIPAPO_BUCKETS(bb) (1 << (bb)) /* Each n-bit range maps to up to n * 2 rules */ #define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) /* Use the rest of mapping table buckets for rule indices, but it makes no sense * to exceed 32 bits */ #if BITS_PER_LONG == 64 #define NFT_PIPAPO_MAP_TOBITS 32 #else #define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) #endif /* ...which gives us the highest allowed index for a rule */ #define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ - (1UL << NFT_PIPAPO_MAP_NBITS)) /* Definitions for vectorised implementations */ #ifdef NFT_PIPAPO_ALIGN #define NFT_PIPAPO_ALIGN_HEADROOM \ (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) #define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) #else #define NFT_PIPAPO_ALIGN_HEADROOM 0 #define NFT_PIPAPO_LT_ALIGN(lt) (lt) #endif /* NFT_PIPAPO_ALIGN */ #define nft_pipapo_for_each_field(field, index, match) \ for ((field) = (match)->f, (index) = 0; \ (index) < (match)->field_count; \ (index)++, (field)++) /** * union nft_pipapo_map_bucket - Bucket of mapping table * @to: First rule number (in next field) this rule maps to * @n: Number of rules (in next field) this rule maps to * @e: If there's no next field, pointer to element this rule maps to */ union nft_pipapo_map_bucket { struct { #if BITS_PER_LONG == 64 static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); u32 to; static_assert(NFT_PIPAPO_MAP_NBITS <= 32); u32 n; #else unsigned long to:NFT_PIPAPO_MAP_TOBITS; unsigned long n:NFT_PIPAPO_MAP_NBITS; #endif }; struct nft_pipapo_elem *e; }; /** * struct nft_pipapo_field - Lookup, mapping tables and related data for a field * @rules: Number of inserted rules * @bsize: Size of each bucket in lookup table, in longs * @rules_alloc: Number of allocated rules, always >= rules * @groups: Amount of bit groups * @bb: Number of bits grouped together in lookup table buckets * @lt: Lookup table: 'groups' rows of buckets * @mt: Mapping table: one bucket per rule */ struct nft_pipapo_field { unsigned int rules; unsigned int bsize; unsigned int rules_alloc; u8 groups; u8 bb; unsigned long *lt; union nft_pipapo_map_bucket *mt; }; /** * struct nft_pipapo_scratch - percpu data used for lookup and matching * @map_index: Current working bitmap index, toggled between field matches * @align_off: Offset to get the originally allocated address * @map: store partial matching results during lookup */ struct nft_pipapo_scratch { u8 map_index; u32 align_off; unsigned long map[]; }; /** * struct nft_pipapo_match - Data used for lookup and matching * @field_count: Amount of fields in set * @bsize_max: Maximum lookup table bucket size of all fields, in longs * @scratch: Preallocated per-CPU maps for partial matching results * @rcu: Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { u8 field_count; unsigned int bsize_max; struct nft_pipapo_scratch * __percpu *scratch; struct rcu_head rcu; struct nft_pipapo_field f[] __counted_by(field_count); }; /** * struct nft_pipapo - Representation of a set * @match: Currently in-use matching data * @clone: Copy where pending insertions and deletions are kept * @width: Total bytes to be matched for one packet, including padding * @last_gc: Timestamp of last garbage collection run, jiffies */ struct nft_pipapo { struct nft_pipapo_match __rcu *match; struct nft_pipapo_match *clone; int width; unsigned long last_gc; }; struct nft_pipapo_elem; /** * struct nft_pipapo_elem - API-facing representation of single set element * @priv: element placeholder * @ext: nftables API extensions */ struct nft_pipapo_elem { struct nft_elem_priv priv; struct nft_set_ext ext; }; int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, unsigned long *dst, const union nft_pipapo_map_bucket *mt, bool match_only); /** * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets * @f: Field including lookup table * @dst: Area to store result * @data: Input data selecting table buckets */ static inline void pipapo_and_field_buckets_4bit(const struct nft_pipapo_field *f, unsigned long *dst, const u8 *data) { unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); int group; for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) { u8 v; v = *data >> 4; __bitmap_and(dst, dst, lt + v * f->bsize, f->bsize * BITS_PER_LONG); lt += f->bsize * NFT_PIPAPO_BUCKETS(4); v = *data & 0x0f; __bitmap_and(dst, dst, lt + v * f->bsize, f->bsize * BITS_PER_LONG); lt += f->bsize * NFT_PIPAPO_BUCKETS(4); } } /** * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets * @f: Field including lookup table * @dst: Area to store result * @data: Input data selecting table buckets */ static inline void pipapo_and_field_buckets_8bit(const struct nft_pipapo_field *f, unsigned long *dst, const u8 *data) { unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); int group; for (group = 0; group < f->groups; group++, data++) { __bitmap_and(dst, dst, lt + *data * f->bsize, f->bsize * BITS_PER_LONG); lt += f->bsize * NFT_PIPAPO_BUCKETS(8); } } /** * pipapo_estimate_size() - Estimate worst-case for set size * @desc: Set description, element count and field description used here * * The size for this set type can vary dramatically, as it depends on the number * of rules (composing netmasks) the entries expand to. We compute the worst * case here. * * In general, for a non-ranged entry or a single composing netmask, we need * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that * is, each input bit needs four bits of matching data), plus a bucket in the * mapping table for each field. * * Return: worst-case set size in bytes, 0 on any overflow */ static u64 pipapo_estimate_size(const struct nft_set_desc *desc) { unsigned long entry_size; u64 size; int i; for (i = 0, entry_size = 0; i < desc->field_count; i++) { unsigned long rules; if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES) return 0; /* Worst-case ranges for each concatenated field: each n-bit * field can expand to up to n * 2 rules in each bucket, and * each rule also needs a mapping bucket. */ rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2; entry_size += rules * NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) / BITS_PER_BYTE; entry_size += rules * sizeof(union nft_pipapo_map_bucket); } /* Rules in lookup and mapping tables are needed for each entry */ size = desc->size * entry_size; if (size && div_u64(size, desc->size) != entry_size) return 0; size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2; size += sizeof(struct nft_pipapo_field) * desc->field_count; return size; } /** * pipapo_resmap_init() - Initialise result map before first use * @m: Matching data, including mapping table * @res_map: Result map * * Initialize all bits covered by the first field to one, so that after * the first step, only the matching bits of the first bit group remain. * * If other fields have a large bitmap, set remainder of res_map to 0. */ static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map) { const struct nft_pipapo_field *f = m->f; int i; for (i = 0; i < f->bsize; i++) res_map[i] = ULONG_MAX; for (i = f->bsize; i < m->bsize_max; i++) res_map[i] = 0ul; } #endif /* _NFT_SET_PIPAPO_H */ |
4 3 2 3 873 874 15 15 2 2 2 2 2 2 2 2 2 2 1 1 2 2 13 13 13 1 2 13 37 1 33 3 8 1 1 1 1 1 7 1 2 2 1 1 2 2 1 8 1 1 1 1 1 1 1 1 7 1 2 2 1 2 1 3 20 1 2 2 19 4 1 1 4 4 1 19 3 2 3 2 2 2 21 1 2 2 1 2 1 12 10 4 3 2 3 6 5 4 1 1 9 4 24 1 13 3 17 1 1 5 9 1 1 2 1 1 1 1 4 3 3 1 1 1 23 1 1 20 1 1 1 1 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* gw.c - CAN frame Gateway/Router/Bridge with netlink interface * * Copyright (c) 2019 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> #include <linux/can/gw.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> #define CAN_GW_NAME "can-gw" MODULE_DESCRIPTION("PF_CAN netlink gateway"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); MODULE_ALIAS(CAN_GW_NAME); #define CGW_MIN_HOPS 1 #define CGW_MAX_HOPS 6 #define CGW_DEFAULT_HOPS 1 static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS; module_param(max_hops, uint, 0444); MODULE_PARM_DESC(max_hops, "maximum " CAN_GW_NAME " routing hops for CAN frames " "(valid values: " __stringify(CGW_MIN_HOPS) "-" __stringify(CGW_MAX_HOPS) " hops, " "default: " __stringify(CGW_DEFAULT_HOPS) ")"); static struct notifier_block notifier; static struct kmem_cache *cgw_cache __read_mostly; /* structure that contains the (on-the-fly) CAN frame modifications */ struct cf_mod { struct { struct canfd_frame and; struct canfd_frame or; struct canfd_frame xor; struct canfd_frame set; } modframe; struct { u8 and; u8 or; u8 xor; u8 set; } modtype; void (*modfunc[MAX_MODFUNCTIONS])(struct canfd_frame *cf, struct cf_mod *mod); /* CAN frame checksum calculation after CAN frame modifications */ struct { struct cgw_csum_xor xor; struct cgw_csum_crc8 crc8; } csum; struct { void (*xor)(struct canfd_frame *cf, struct cgw_csum_xor *xor); void (*crc8)(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8); } csumfunc; u32 uid; }; /* So far we just support CAN -> CAN routing and frame modifications. * * The internal can_can_gw structure contains data and attributes for * a CAN -> CAN gateway job. */ struct can_can_gw { struct can_filter filter; int src_idx; int dst_idx; }; /* list entry for CAN gateways jobs */ struct cgw_job { struct hlist_node list; struct rcu_head rcu; u32 handled_frames; u32 dropped_frames; u32 deleted_frames; struct cf_mod mod; union { /* CAN frame data source */ struct net_device *dev; } src; union { /* CAN frame data destination */ struct net_device *dev; } dst; union { struct can_can_gw ccgw; /* tbc */ }; u8 gwtype; u8 limit_hops; u16 flags; }; /* modification functions that are invoked in the hot path in can_can_gw_rcv */ #define MODFUNC(func, op) static void func(struct canfd_frame *cf, \ struct cf_mod *mod) { op ; } MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id) MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len) MODFUNC(mod_and_flags, cf->flags &= mod->modframe.and.flags) MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data) MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id) MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len) MODFUNC(mod_or_flags, cf->flags |= mod->modframe.or.flags) MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data) MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id) MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len) MODFUNC(mod_xor_flags, cf->flags ^= mod->modframe.xor.flags) MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data) MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id) MODFUNC(mod_set_len, cf->len = mod->modframe.set.len) MODFUNC(mod_set_flags, cf->flags = mod->modframe.set.flags) MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data) static void mod_and_fddata(struct canfd_frame *cf, struct cf_mod *mod) { int i; for (i = 0; i < CANFD_MAX_DLEN; i += 8) *(u64 *)(cf->data + i) &= *(u64 *)(mod->modframe.and.data + i); } static void mod_or_fddata(struct canfd_frame *cf, struct cf_mod *mod) { int i; for (i = 0; i < CANFD_MAX_DLEN; i += 8) *(u64 *)(cf->data + i) |= *(u64 *)(mod->modframe.or.data + i); } static void mod_xor_fddata(struct canfd_frame *cf, struct cf_mod *mod) { int i; for (i = 0; i < CANFD_MAX_DLEN; i += 8) *(u64 *)(cf->data + i) ^= *(u64 *)(mod->modframe.xor.data + i); } static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod) { memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN); } /* retrieve valid CC DLC value and store it into 'len' */ static void mod_retrieve_ccdlc(struct canfd_frame *cf) { struct can_frame *ccf = (struct can_frame *)cf; /* len8_dlc is only valid if len == CAN_MAX_DLEN */ if (ccf->len != CAN_MAX_DLEN) return; /* do we have a valid len8_dlc value from 9 .. 15 ? */ if (ccf->len8_dlc > CAN_MAX_DLEN && ccf->len8_dlc <= CAN_MAX_RAW_DLC) ccf->len = ccf->len8_dlc; } /* convert valid CC DLC value in 'len' into struct can_frame elements */ static void mod_store_ccdlc(struct canfd_frame *cf) { struct can_frame *ccf = (struct can_frame *)cf; /* clear potential leftovers */ ccf->len8_dlc = 0; /* plain data length 0 .. 8 - that was easy */ if (ccf->len <= CAN_MAX_DLEN) return; /* potentially broken values are caught in can_can_gw_rcv() */ if (ccf->len > CAN_MAX_RAW_DLC) return; /* we have a valid dlc value from 9 .. 15 in ccf->len */ ccf->len8_dlc = ccf->len; ccf->len = CAN_MAX_DLEN; } static void mod_and_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_retrieve_ccdlc(cf); mod_and_len(cf, mod); mod_store_ccdlc(cf); } static void mod_or_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_retrieve_ccdlc(cf); mod_or_len(cf, mod); mod_store_ccdlc(cf); } static void mod_xor_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_retrieve_ccdlc(cf); mod_xor_len(cf, mod); mod_store_ccdlc(cf); } static void mod_set_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_set_len(cf, mod); mod_store_ccdlc(cf); } static void canframecpy(struct canfd_frame *dst, struct can_frame *src) { /* Copy the struct members separately to ensure that no uninitialized * data are copied in the 3 bytes hole of the struct. This is needed * to make easy compares of the data in the struct cf_mod. */ dst->can_id = src->can_id; dst->len = src->len; *(u64 *)dst->data = *(u64 *)src->data; } static void canfdframecpy(struct canfd_frame *dst, struct canfd_frame *src) { /* Copy the struct members separately to ensure that no uninitialized * data are copied in the 2 bytes hole of the struct. This is needed * to make easy compares of the data in the struct cf_mod. */ dst->can_id = src->can_id; dst->flags = src->flags; dst->len = src->len; memcpy(dst->data, src->data, CANFD_MAX_DLEN); } static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re, struct rtcanmsg *r) { s8 dlen = CAN_MAX_DLEN; if (r->flags & CGW_FLAGS_CAN_FD) dlen = CANFD_MAX_DLEN; /* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] * relative to received dlc -1 .. -8 : * e.g. for received dlc = 8 * -1 => index = 7 (data[7]) * -3 => index = 5 (data[5]) * -8 => index = 0 (data[0]) */ if (fr >= -dlen && fr < dlen && to >= -dlen && to < dlen && re >= -dlen && re < dlen) return 0; else return -EINVAL; } static inline int calc_idx(int idx, int rx_len) { if (idx < 0) return rx_len + idx; else return idx; } static void cgw_csum_xor_rel(struct canfd_frame *cf, struct cgw_csum_xor *xor) { int from = calc_idx(xor->from_idx, cf->len); int to = calc_idx(xor->to_idx, cf->len); int res = calc_idx(xor->result_idx, cf->len); u8 val = xor->init_xor_val; int i; if (from < 0 || to < 0 || res < 0) return; if (from <= to) { for (i = from; i <= to; i++) val ^= cf->data[i]; } else { for (i = from; i >= to; i--) val ^= cf->data[i]; } cf->data[res] = val; } static void cgw_csum_xor_pos(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; for (i = xor->from_idx; i <= xor->to_idx; i++) val ^= cf->data[i]; cf->data[xor->result_idx] = val; } static void cgw_csum_xor_neg(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; for (i = xor->from_idx; i >= xor->to_idx; i--) val ^= cf->data[i]; cf->data[xor->result_idx] = val; } static void cgw_csum_crc8_rel(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8) { int from = calc_idx(crc8->from_idx, cf->len); int to = calc_idx(crc8->to_idx, cf->len); int res = calc_idx(crc8->result_idx, cf->len); u8 crc = crc8->init_crc_val; int i; if (from < 0 || to < 0 || res < 0) return; if (from <= to) { for (i = crc8->from_idx; i <= crc8->to_idx; i++) crc = crc8->crctab[crc ^ cf->data[i]]; } else { for (i = crc8->from_idx; i >= crc8->to_idx; i--) crc = crc8->crctab[crc ^ cf->data[i]]; } switch (crc8->profile) { case CGW_CRC8PRF_1U8: crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_pos(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i <= crc8->to_idx; i++) crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { case CGW_CRC8PRF_1U8: crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_neg(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i >= crc8->to_idx; i--) crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { case CGW_CRC8PRF_1U8: crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } /* the receive & process & send function */ static void can_can_gw_rcv(struct sk_buff *skb, void *data) { struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ if (gwj->flags & CGW_FLAGS_CAN_FD) { if (!can_is_canfd_skb(skb)) return; } else { if (!can_is_can_skb(skb)) return; } /* Do not handle CAN frames routed more than 'max_hops' times. * In general we should never catch this delimiter which is intended * to cover a misconfiguration protection (e.g. circular CAN routes). * * The Controller Area Network controllers only accept CAN frames with * correct CRCs - which are not visible in the controller registers. * According to skbuff.h documentation the csum_start element for IP * checksums is undefined/unused when ip_summed == CHECKSUM_UNNECESSARY. * Only CAN skbs can be processed here which already have this property. */ #define cgw_hops(skb) ((skb)->csum_start) BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY); if (cgw_hops(skb) >= max_hops) { /* indicate deleted frames due to misconfiguration */ gwj->deleted_frames++; return; } if (!(gwj->dst.dev->flags & IFF_UP)) { gwj->dropped_frames++; return; } /* is sending the skb back to the incoming interface not allowed? */ if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) return; /* clone the given skb, which has not been done in can_rcv() * * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ if (gwj->mod.modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) { gwj->dropped_frames++; return; } /* put the incremented hop counter in the cloned skb */ cgw_hops(nskb) = cgw_hops(skb) + 1; /* first processing of this CAN frame -> adjust to private hop limit */ if (gwj->limit_hops && cgw_hops(nskb) == 1) cgw_hops(nskb) = max_hops - gwj->limit_hops + 1; nskb->dev = gwj->dst.dev; /* pointer to modifiable CAN frame */ cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); /* Has the CAN frame been modified? */ if (modidx) { /* get available space for the processed CAN frame type */ int max_len = nskb->len - offsetof(struct canfd_frame, data); /* dlc may have changed, make sure it fits to the CAN frame */ if (cf->len > max_len) { /* delete frame due to misconfiguration */ gwj->deleted_frames++; kfree_skb(nskb); return; } /* check for checksum updates */ if (gwj->mod.csumfunc.crc8) (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); } /* clear the skb timestamp if not configured the other way */ if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP)) nskb->tstamp = 0; /* send to netdevice */ if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO)) gwj->dropped_frames++; else gwj->handled_frames++; } static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) { return can_rx_register(net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj) { can_rx_unregister(net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); } static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); kmem_cache_free(cgw_cache, gwj); } static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; if (msg == NETDEV_UNREGISTER) { struct cgw_job *gwj = NULL; struct hlist_node *nx; ASSERT_RTNL(); hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); call_rcu(&gwj->rcu, cgw_job_free_rcu); } } } return NOTIFY_DONE; } static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, u32 pid, u32 seq, int flags) { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) return -EMSGSIZE; rtcan = nlmsg_data(nlh); rtcan->can_family = AF_CAN; rtcan->gwtype = gwj->gwtype; rtcan->flags = gwj->flags; /* add statistics if available */ if (gwj->handled_frames) { if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0) goto cancel; } if (gwj->dropped_frames) { if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0) goto cancel; } if (gwj->deleted_frames) { if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0) goto cancel; } /* check non default settings of attributes */ if (gwj->limit_hops) { if (nla_put_u8(skb, CGW_LIM_HOPS, gwj->limit_hops) < 0) goto cancel; } if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; if (gwj->mod.modtype.and) { memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.or) { memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.xor) { memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.set) { memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; if (gwj->mod.modtype.and) { memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.or) { memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.xor) { memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.set) { memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } if (gwj->mod.uid) { if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) goto cancel; } if (gwj->mod.csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, &gwj->mod.csum.crc8) < 0) goto cancel; } if (gwj->mod.csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, &gwj->mod.csum.xor) < 0) goto cancel; } if (gwj->gwtype == CGW_TYPE_CAN_CAN) { if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) { if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter), &gwj->ccgw.filter) < 0) goto cancel; } if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0) goto cancel; if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0) goto cancel; } nlmsg_end(skb, nlh); return 0; cancel: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } /* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct cgw_job *gwj = NULL; int idx = 0; int s_idx = cb->args[0]; rcu_read_lock(); hlist_for_each_entry_rcu(gwj, &net->can.cgw_list, list) { if (idx < s_idx) goto cont; if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: idx++; } rcu_read_unlock(); cb->args[0] = idx; return skb->len; } static const struct nla_policy cgw_policy[CGW_MAX + 1] = { [CGW_MOD_AND] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_OR] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_XOR] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_SET] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_CS_XOR] = { .len = sizeof(struct cgw_csum_xor) }, [CGW_CS_CRC8] = { .len = sizeof(struct cgw_csum_crc8) }, [CGW_SRC_IF] = { .type = NLA_U32 }, [CGW_DST_IF] = { .type = NLA_U32 }, [CGW_FILTER] = { .len = sizeof(struct can_filter) }, [CGW_LIM_HOPS] = { .type = NLA_U8 }, [CGW_MOD_UID] = { .type = NLA_U32 }, [CGW_FDMOD_AND] = { .len = sizeof(struct cgw_fdframe_mod) }, [CGW_FDMOD_OR] = { .len = sizeof(struct cgw_fdframe_mod) }, [CGW_FDMOD_XOR] = { .len = sizeof(struct cgw_fdframe_mod) }, [CGW_FDMOD_SET] = { .len = sizeof(struct cgw_fdframe_mod) }, }; /* check for common and gwtype specific attributes */ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, u8 gwtype, void *gwtypeattr, u8 *limhops) { struct nlattr *tb[CGW_MAX + 1]; struct rtcanmsg *r = nlmsg_data(nlh); int modidx = 0; int err = 0; /* initialize modification & checksum data space */ memset(mod, 0, sizeof(*mod)); err = nlmsg_parse_deprecated(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, cgw_policy, NULL); if (err < 0) return err; if (tb[CGW_LIM_HOPS]) { *limhops = nla_get_u8(tb[CGW_LIM_HOPS]); if (*limhops < 1 || *limhops > max_hops) return -EINVAL; } /* check for AND/OR/XOR/SET modifications */ if (r->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; if (tb[CGW_FDMOD_AND]) { nla_memcpy(&mb, tb[CGW_FDMOD_AND], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.and, &mb.cf); mod->modtype.and = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_and_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_and_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_and_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_and_fddata; } if (tb[CGW_FDMOD_OR]) { nla_memcpy(&mb, tb[CGW_FDMOD_OR], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.or, &mb.cf); mod->modtype.or = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_or_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_or_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_or_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_or_fddata; } if (tb[CGW_FDMOD_XOR]) { nla_memcpy(&mb, tb[CGW_FDMOD_XOR], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.xor, &mb.cf); mod->modtype.xor = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_xor_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_xor_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_xor_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_xor_fddata; } if (tb[CGW_FDMOD_SET]) { nla_memcpy(&mb, tb[CGW_FDMOD_SET], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.set, &mb.cf); mod->modtype.set = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_set_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_set_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_set_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_set_fddata; } } else { struct cgw_frame_mod mb; if (tb[CGW_MOD_AND]) { nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); canframecpy(&mod->modframe.and, &mb.cf); mod->modtype.and = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_and_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_and_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_and_data; } if (tb[CGW_MOD_OR]) { nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); canframecpy(&mod->modframe.or, &mb.cf); mod->modtype.or = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_or_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_or_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_or_data; } if (tb[CGW_MOD_XOR]) { nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); canframecpy(&mod->modframe.xor, &mb.cf); mod->modtype.xor = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_xor_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_xor_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_xor_data; } if (tb[CGW_MOD_SET]) { nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); canframecpy(&mod->modframe.set, &mb.cf); mod->modtype.set = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_set_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_set_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_set_data; } } /* check for checksum operations after CAN frame modifications */ if (modidx) { if (tb[CGW_CS_CRC8]) { struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8], CGW_CS_CRC8_LEN); /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || c->result_idx < 0) mod->csumfunc.crc8 = cgw_csum_crc8_rel; else if (c->from_idx <= c->to_idx) mod->csumfunc.crc8 = cgw_csum_crc8_pos; else mod->csumfunc.crc8 = cgw_csum_crc8_neg; } if (tb[CGW_CS_XOR]) { struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR], CGW_CS_XOR_LEN); /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || c->result_idx < 0) mod->csumfunc.xor = cgw_csum_xor_rel; else if (c->from_idx <= c->to_idx) mod->csumfunc.xor = cgw_csum_xor_pos; else mod->csumfunc.xor = cgw_csum_xor_neg; } if (tb[CGW_MOD_UID]) nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32)); } if (gwtype == CGW_TYPE_CAN_CAN) { /* check CGW_TYPE_CAN_CAN specific attributes */ struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr; memset(ccgw, 0, sizeof(*ccgw)); /* check for can_filter in attributes */ if (tb[CGW_FILTER]) nla_memcpy(&ccgw->filter, tb[CGW_FILTER], sizeof(struct can_filter)); err = -ENODEV; /* specifying two interfaces is mandatory */ if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF]) return err; ccgw->src_idx = nla_get_u32(tb[CGW_SRC_IF]); ccgw->dst_idx = nla_get_u32(tb[CGW_DST_IF]); /* both indices set to 0 for flushing all routing entries */ if (!ccgw->src_idx && !ccgw->dst_idx) return 0; /* only one index set to 0 is an error */ if (!ccgw->src_idx || !ccgw->dst_idx) return err; } /* add the checks for other gwtypes here */ return 0; } static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; struct cf_mod mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) return -EINVAL; r = nlmsg_data(nlh); if (r->can_family != AF_CAN) return -EPFNOSUPPORT; /* so far we only support CAN -> CAN routings */ if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); if (err < 0) return err; if (mod.uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { if (gwj->mod.uid != mod.uid) continue; /* interfaces & filters must be identical */ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) return -EINVAL; /* update modifications with disabled softirq & quit */ local_bh_disable(); memcpy(&gwj->mod, &mod, sizeof(mod)); local_bh_enable(); return 0; } } /* ifindex == 0 is not allowed for job creation */ if (!ccgw.src_idx || !ccgw.dst_idx) return -ENODEV; gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); if (!gwj) return -ENOMEM; gwj->handled_frames = 0; gwj->dropped_frames = 0; gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; gwj->limit_hops = limhops; /* insert already parsed information */ memcpy(&gwj->mod, &mod, sizeof(mod)); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; gwj->src.dev = __dev_get_by_index(net, gwj->ccgw.src_idx); if (!gwj->src.dev) goto out; if (gwj->src.dev->type != ARPHRD_CAN) goto out; gwj->dst.dev = __dev_get_by_index(net, gwj->ccgw.dst_idx); if (!gwj->dst.dev) goto out; if (gwj->dst.dev->type != ARPHRD_CAN) goto out; /* is sending the skb back to the incoming interface intended? */ if (gwj->src.dev == gwj->dst.dev && !(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK)) { err = -EINVAL; goto out; } ASSERT_RTNL(); err = cgw_register_filter(net, gwj); if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: if (err) kmem_cache_free(cgw_cache, gwj); return err; } static void cgw_remove_all_jobs(struct net *net) { struct cgw_job *gwj = NULL; struct hlist_node *nx; ASSERT_RTNL(); hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); call_rcu(&gwj->rcu, cgw_job_free_rcu); } } static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct cgw_job *gwj = NULL; struct hlist_node *nx; struct rtcanmsg *r; struct cf_mod mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) return -EINVAL; r = nlmsg_data(nlh); if (r->can_family != AF_CAN) return -EPFNOSUPPORT; /* so far we only support CAN -> CAN routings */ if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); if (err < 0) return err; /* two interface indices both set to 0 => remove all entries */ if (!ccgw.src_idx && !ccgw.dst_idx) { cgw_remove_all_jobs(net); return 0; } err = -EINVAL; ASSERT_RTNL(); /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; /* we have a match when uid is enabled and identical */ if (gwj->mod.uid || mod.uid) { if (gwj->mod.uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ if (memcmp(&gwj->mod, &mod, sizeof(mod))) continue; } /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) continue; hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); call_rcu(&gwj->rcu, cgw_job_free_rcu); err = 0; break; } return err; } static int __net_init cangw_pernet_init(struct net *net) { INIT_HLIST_HEAD(&net->can.cgw_list); return 0; } static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list) { struct net *net; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) cgw_remove_all_jobs(net); rtnl_unlock(); } static struct pernet_operations cangw_pernet_ops = { .init = cangw_pernet_init, .exit_batch = cangw_pernet_exit_batch, }; static __init int cgw_module_init(void) { int ret; /* sanitize given module parameter */ max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS); pr_info("can: netlink gateway - max_hops=%d\n", max_hops); ret = register_pernet_subsys(&cangw_pernet_ops); if (ret) return ret; ret = -ENOMEM; cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), 0, 0, NULL); if (!cgw_cache) goto out_cache_create; /* set notifier */ notifier.notifier_call = cgw_notifier; ret = register_netdevice_notifier(¬ifier); if (ret) goto out_register_notifier; ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0); if (ret) goto out_rtnl_register1; ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0); if (ret) goto out_rtnl_register2; ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0); if (ret) goto out_rtnl_register3; return 0; out_rtnl_register3: rtnl_unregister(PF_CAN, RTM_NEWROUTE); out_rtnl_register2: rtnl_unregister(PF_CAN, RTM_GETROUTE); out_rtnl_register1: unregister_netdevice_notifier(¬ifier); out_register_notifier: kmem_cache_destroy(cgw_cache); out_cache_create: unregister_pernet_subsys(&cangw_pernet_ops); return ret; } static __exit void cgw_module_exit(void) { rtnl_unregister_all(PF_CAN); unregister_netdevice_notifier(¬ifier); unregister_pernet_subsys(&cangw_pernet_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ kmem_cache_destroy(cgw_cache); } module_init(cgw_module_init); module_exit(cgw_module_exit); |
37 35 8 8 8 8 4 4 33 34 4 4 3 4 4 1 1 1 2 1 1 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 5 5 5 5 5 3 2 4 2 4 4 2 4 4 2 2 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 2 4 4 5 5 2 2 4 3 3 2 4 4 2 2 2 1 1 1 1 1 2 2 4 4 4 1 1 33 33 33 5 5 5 33 33 31 8 28 30 33 33 29 8 36 37 37 37 37 37 37 37 1 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH * Copyright 2018-2020, 2022-2024 Intel Corporation */ #include <crypto/utils.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/export.h> #include <net/mac80211.h> #include <asm/unaligned.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "debugfs_key.h" #include "aes_ccm.h" #include "aes_cmac.h" #include "aes_gmac.h" #include "aes_gcm.h" /** * DOC: Key handling basics * * Key handling in mac80211 is done based on per-interface (sub_if_data) * keys and per-station keys. Since each station belongs to an interface, * each station key also belongs to that interface. * * Hardware acceleration is done on a best-effort basis for algorithms * that are implemented in software, for each key the hardware is asked * to enable that key for offloading but if it cannot do that the key is * simply kept for software encryption (unless it is for an algorithm * that isn't implemented in software). * There is currently no way of knowing whether a key is handled in SW * or HW except by looking into debugfs. * * All key management is internally protected by a mutex. Within all * other parts of mac80211, key references are, just as STA structure * references, protected by RCU. Note, however, that some things are * unprotected, namely the key->sta dereferences within the hardware * acceleration functions. This means that sta_info_destroy() must * remove the key which waits for an RCU grace period. */ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; static void update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { struct ieee80211_sub_if_data *vlan; if (sdata->vif.type != NL80211_IFTYPE_AP) return; /* crypto_tx_tailroom_needed_cnt is protected by this */ lockdep_assert_wiphy(sdata->local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list) vlan->crypto_tx_tailroom_needed_cnt += delta; rcu_read_unlock(); } static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) { /* * When this count is zero, SKB resizing for allocating tailroom * for IV or MMIC is skipped. But, this check has created two race * cases in xmit path while transiting from zero count to one: * * 1. SKB resize was skipped because no key was added but just before * the xmit key is added and SW encryption kicks off. * * 2. SKB resize was skipped because all the keys were hw planted but * just before xmit one of the key is deleted and SW encryption kicks * off. * * In both the above case SW encryption will find not enough space for * tailroom and exits with WARN_ON. (See WARN_ONs at wpa.c) * * Solution has been explained at * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ lockdep_assert_wiphy(sdata->local->hw.wiphy); update_vlan_tailroom_need_count(sdata, 1); if (!sdata->crypto_tx_tailroom_needed_cnt++) { /* * Flush all XMIT packets currently using HW encryption or no * encryption at all if the count transition is from 0 -> 1. */ synchronize_net(); } } static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); update_vlan_tailroom_need_count(sdata, -delta); sdata->crypto_tx_tailroom_needed_cnt -= delta; } static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata = key->sdata; struct sta_info *sta; int ret = -EOPNOTSUPP; might_sleep(); lockdep_assert_wiphy(key->local->hw.wiphy); if (key->flags & KEY_FLAG_TAINTED) { /* If we get here, it's during resume and the key is * tainted so shouldn't be used/programmed any more. * However, its flags may still indicate that it was * programmed into the device (since we're in resume) * so clear that flag now to avoid trying to remove * it again later. */ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; return -EINVAL; } if (!key->local->ops->set_key) goto out_unsupported; sta = key->sta; /* * If this is a per-STA GTK, check if it * is supported; if not, return. */ if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) && !ieee80211_hw_check(&key->local->hw, SUPPORTS_PER_STA_GTK)) goto out_unsupported; if (sta && !sta->uploaded) goto out_unsupported; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { /* * The driver doesn't know anything about VLAN interfaces. * Hence, don't send GTKs for VLAN interfaces to the driver. */ if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { ret = 1; goto out_unsupported; } } if (key->conf.link_id >= 0 && sdata->vif.active_links && !(sdata->vif.active_links & BIT(key->conf.link_id))) return 0; ret = drv_set_key(key->local, SET_KEY, sdata, sta ? &sta->sta : NULL, &key->conf); if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) decrease_tailroom_need_count(sdata, 1); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) && (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)); return 0; } if (ret != -ENOSPC && ret != -EOPNOTSUPP && ret != 1) sdata_err(sdata, "failed to set key (%d, %pM) to hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); out_unsupported: switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: /* all of these we can do in software - if driver can */ if (ret == 1) return 0; if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; return 0; default: return -EINVAL; } } static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; int ret; might_sleep(); if (!key || !key->local->ops->set_key) return; if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; sta = key->sta; sdata = key->sdata; lockdep_assert_wiphy(key->local->hw.wiphy); if (key->conf.link_id >= 0 && sdata->vif.active_links && !(sdata->vif.active_links & BIT(key->conf.link_id))) return; if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; ret = drv_set_key(key->local, DISABLE_KEY, sdata, sta ? &sta->sta : NULL, &key->conf); if (ret) sdata_err(sdata, "failed to remove key (%d, %pM) from hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); } static int _ieee80211_set_tx_key(struct ieee80211_key *key, bool force) { struct sta_info *sta = key->sta; struct ieee80211_local *local = key->local; lockdep_assert_wiphy(local->hw.wiphy); set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION); sta->ptk_idx = key->conf.keyidx; if (force || !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_check_fast_xmit(sta); return 0; } int ieee80211_set_tx_key(struct ieee80211_key *key) { return _ieee80211_set_tx_key(key, false); } static void ieee80211_pairwise_rekey(struct ieee80211_key *old, struct ieee80211_key *new) { struct ieee80211_local *local = new->local; struct sta_info *sta = new->sta; int i; lockdep_assert_wiphy(local->hw.wiphy); if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) { /* Extended Key ID key install, initial one or rekey */ if (sta->ptk_idx != INVALID_PTK_KEYIDX && !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) { /* Aggregation Sessions with Extended Key ID must not * mix MPDUs with different keyIDs within one A-MPDU. * Tear down running Tx aggregation sessions and block * new Rx/Tx aggregation requests during rekey to * ensure there are no A-MPDUs when the driver is not * supporting A-MPDU key borders. (Blocking Tx only * would be sufficient but WLAN_STA_BLOCK_BA gets the * job done for the few ms we need it.) */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); for (i = 0; i < IEEE80211_NUM_TIDS; i++) __ieee80211_stop_tx_ba_session(sta, i, AGG_STOP_LOCAL_REQUEST); } } else if (old) { /* Rekey without Extended Key ID. * Aggregation sessions are OK when running on SW crypto. * A broken remote STA may cause issues not observed with HW * crypto, though. */ if (!(old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; /* Stop Tx till we are on the new key */ old->flags |= KEY_FLAG_TAINTED; ieee80211_clear_fast_xmit(sta); if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_LOCAL_REQUEST); } if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0)) { pr_warn_ratelimited("Rekeying PTK for STA %pM but driver can't safely do that.", sta->sta.addr); /* Flushing the driver queues *may* help prevent * the clear text leaks and freezes. */ ieee80211_flush_queues(local, old->sdata, false); } } } static void __ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= 0 && idx < NUM_DEFAULT_KEYS) { key = wiphy_dereference(sdata->local->hw.wiphy, sdata->keys[idx]); if (!key) key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); } if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); ieee80211_check_fast_xmit_iface(sdata); if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN) drv_set_default_unicast_key(sdata->local, sdata, idx); } if (multi) rcu_assign_pointer(link->default_multicast_key, key); ieee80211_debugfs_key_update_default(sdata); } void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { lockdep_assert_wiphy(link->sdata->local->hw.wiphy); __ieee80211_set_default_key(link, idx, uni, multi); } static void __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); rcu_assign_pointer(link->default_mgmt_key, key); ieee80211_debugfs_key_update_default(sdata); } void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { lockdep_assert_wiphy(link->sdata->local->hw.wiphy); __ieee80211_set_default_mgmt_key(link, idx); } static void __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); rcu_assign_pointer(link->default_beacon_key, key); ieee80211_debugfs_key_update_default(sdata); } void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { lockdep_assert_wiphy(link->sdata->local->hw.wiphy); __ieee80211_set_default_beacon_key(link, idx); } static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct sta_info *sta, bool pairwise, struct ieee80211_key *old, struct ieee80211_key *new) { struct link_sta_info *link_sta = sta ? &sta->deflink : NULL; int link_id; int idx; int ret = 0; bool defunikey, defmultikey, defmgmtkey, defbeaconkey; bool is_wep; lockdep_assert_wiphy(sdata->local->hw.wiphy); /* caller must provide at least one old/new */ if (WARN_ON(!new && !old)) return 0; if (new) { idx = new->conf.keyidx; is_wep = new->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || new->conf.cipher == WLAN_CIPHER_SUITE_WEP104; link_id = new->conf.link_id; } else { idx = old->conf.keyidx; is_wep = old->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || old->conf.cipher == WLAN_CIPHER_SUITE_WEP104; link_id = old->conf.link_id; } if (WARN(old && old->conf.link_id != link_id, "old link ID %d doesn't match new link ID %d\n", old->conf.link_id, link_id)) return -EINVAL; if (link_id >= 0) { if (!link) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; } if (sta) { link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) return -ENOLINK; } } else { link = &sdata->deflink; } if ((is_wep || pairwise) && idx >= NUM_DEFAULT_KEYS) return -EINVAL; WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); if (new && sta && pairwise) { /* Unicast rekey needs special handling. With Extended Key ID * old is still NULL for the first rekey. */ ieee80211_pairwise_rekey(old, new); } if (old) { if (old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { ieee80211_key_disable_hw_accel(old); if (new) ret = ieee80211_key_enable_hw_accel(new); } } else { if (!new->local->wowlan) ret = ieee80211_key_enable_hw_accel(new); else new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; } if (ret) return ret; if (new) list_add_tail_rcu(&new->list, &sdata->key_list); if (sta) { if (pairwise) { rcu_assign_pointer(sta->ptk[idx], new); if (new && !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) _ieee80211_set_tx_key(new, true); } else { rcu_assign_pointer(link_sta->gtk[idx], new); } /* Only needed for transition from no key -> key. * Still triggers unnecessary when using Extended Key ID * and installing the second key ID the first time. */ if (new && !old) ieee80211_check_fast_rx(sta); } else { defunikey = old && old == wiphy_dereference(sdata->local->hw.wiphy, sdata->default_unicast_key); defmultikey = old && old == wiphy_dereference(sdata->local->hw.wiphy, link->default_multicast_key); defmgmtkey = old && old == wiphy_dereference(sdata->local->hw.wiphy, link->default_mgmt_key); defbeaconkey = old && old == wiphy_dereference(sdata->local->hw.wiphy, link->default_beacon_key); if (defunikey && !new) __ieee80211_set_default_key(link, -1, true, false); if (defmultikey && !new) __ieee80211_set_default_key(link, -1, false, true); if (defmgmtkey && !new) __ieee80211_set_default_mgmt_key(link, -1); if (defbeaconkey && !new) __ieee80211_set_default_beacon_key(link, -1); if (is_wep || pairwise) rcu_assign_pointer(sdata->keys[idx], new); else rcu_assign_pointer(link->gtk[idx], new); if (defunikey && new) __ieee80211_set_default_key(link, new->conf.keyidx, true, false); if (defmultikey && new) __ieee80211_set_default_key(link, new->conf.keyidx, false, true); if (defmgmtkey && new) __ieee80211_set_default_mgmt_key(link, new->conf.keyidx); if (defbeaconkey && new) __ieee80211_set_default_beacon_key(link, new->conf.keyidx); } if (old) list_del_rcu(&old->list); return 0; } struct ieee80211_key * ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq) { struct ieee80211_key *key; int i, j, err; if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS)) return ERR_PTR(-EINVAL); key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); if (!key) return ERR_PTR(-ENOMEM); /* * Default to software encryption; we'll later upload the * key to the hardware if possible. */ key->conf.flags = 0; key->flags = 0; key->conf.link_id = -1; key->conf.cipher = cipher; key->conf.keyidx = idx; key->conf.keylen = key_len; switch (cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: key->conf.iv_len = IEEE80211_WEP_IV_LEN; key->conf.icv_len = IEEE80211_WEP_ICV_LEN; break; case WLAN_CIPHER_SUITE_TKIP: key->conf.iv_len = IEEE80211_TKIP_IV_LEN; key->conf.icv_len = IEEE80211_TKIP_ICV_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS; i++) { key->u.tkip.rx[i].iv32 = get_unaligned_le32(&seq[2]); key->u.tkip.rx[i].iv16 = get_unaligned_le16(seq); } } spin_lock_init(&key->u.tkip.txlock); break; case WLAN_CIPHER_SUITE_CCMP: key->conf.iv_len = IEEE80211_CCMP_HDR_LEN; key->conf.icv_len = IEEE80211_CCMP_MIC_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) for (j = 0; j < IEEE80211_CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = seq[IEEE80211_CCMP_PN_LEN - j - 1]; } /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( key_data, key_len, IEEE80211_CCMP_MIC_LEN); if (IS_ERR(key->u.ccmp.tfm)) { err = PTR_ERR(key->u.ccmp.tfm); kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_CCMP_256: key->conf.iv_len = IEEE80211_CCMP_256_HDR_LEN; key->conf.icv_len = IEEE80211_CCMP_256_MIC_LEN; for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) for (j = 0; j < IEEE80211_CCMP_256_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = seq[IEEE80211_CCMP_256_PN_LEN - j - 1]; /* Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( key_data, key_len, IEEE80211_CCMP_256_MIC_LEN); if (IS_ERR(key->u.ccmp.tfm)) { err = PTR_ERR(key->u.ccmp.tfm); kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->conf.iv_len = 0; if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) key->conf.icv_len = sizeof(struct ieee80211_mmie); else key->conf.icv_len = sizeof(struct ieee80211_mmie_16); if (seq) for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = seq[IEEE80211_CMAC_PN_LEN - j - 1]; /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.aes_cmac.tfm = ieee80211_aes_cmac_key_setup(key_data, key_len); if (IS_ERR(key->u.aes_cmac.tfm)) { err = PTR_ERR(key->u.aes_cmac.tfm); kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie_16); if (seq) for (j = 0; j < IEEE80211_GMAC_PN_LEN; j++) key->u.aes_gmac.rx_pn[j] = seq[IEEE80211_GMAC_PN_LEN - j - 1]; /* Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.aes_gmac.tfm = ieee80211_aes_gmac_key_setup(key_data, key_len); if (IS_ERR(key->u.aes_gmac.tfm)) { err = PTR_ERR(key->u.aes_gmac.tfm); kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: key->conf.iv_len = IEEE80211_GCMP_HDR_LEN; key->conf.icv_len = IEEE80211_GCMP_MIC_LEN; for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) for (j = 0; j < IEEE80211_GCMP_PN_LEN; j++) key->u.gcmp.rx_pn[i][j] = seq[IEEE80211_GCMP_PN_LEN - j - 1]; /* Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.gcmp.tfm = ieee80211_aes_gcm_key_setup_encrypt(key_data, key_len); if (IS_ERR(key->u.gcmp.tfm)) { err = PTR_ERR(key->u.gcmp.tfm); kfree(key); return ERR_PTR(err); } break; } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); return key; } static void ieee80211_key_free_common(struct ieee80211_key *key) { switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: ieee80211_aes_key_free(key->u.ccmp.tfm); break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: ieee80211_aes_gmac_key_free(key->u.aes_gmac.tfm); break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: ieee80211_aes_gcm_key_free(key->u.gcmp.tfm); break; } kfree_sensitive(key); } static void __ieee80211_key_destroy(struct ieee80211_key *key, bool delay_tailroom) { if (key->local) { struct ieee80211_sub_if_data *sdata = key->sdata; ieee80211_debugfs_key_remove(key); if (delay_tailroom) { /* see ieee80211_delayed_tailroom_dec */ sdata->crypto_tx_tailroom_pending_dec++; wiphy_delayed_work_queue(sdata->local->hw.wiphy, &sdata->dec_tailroom_needed_wk, HZ / 2); } else { decrease_tailroom_need_count(sdata, 1); } } ieee80211_key_free_common(key); } static void ieee80211_key_destroy(struct ieee80211_key *key, bool delay_tailroom) { if (!key) return; /* * Synchronize so the TX path and rcu key iterators * can no longer be using this key before we free/remove it. */ synchronize_net(); __ieee80211_key_destroy(key, delay_tailroom); } void ieee80211_key_free_unused(struct ieee80211_key *key) { if (!key) return; WARN_ON(key->sdata || key->local); ieee80211_key_free_common(key); } static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata, struct ieee80211_key *old, struct ieee80211_key *new) { u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP]; u8 *tk_old, *tk_new; if (!old || new->conf.keylen != old->conf.keylen) return false; tk_old = old->conf.key; tk_new = new->conf.key; /* * In station mode, don't compare the TX MIC key, as it's never used * and offloaded rekeying may not care to send it to the host. This * is the case in iwlwifi, for example. */ if (sdata->vif.type == NL80211_IFTYPE_STATION && new->conf.cipher == WLAN_CIPHER_SUITE_TKIP && new->conf.keylen == WLAN_KEY_LEN_TKIP && !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP); memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP); memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); tk_old = tkip_old; tk_new = tkip_new; } return !crypto_memneq(tk_old, tk_new, new->conf.keylen); } int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_link_data *link, struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = link->sdata; static atomic_t key_color = ATOMIC_INIT(0); struct ieee80211_key *old_key = NULL; int idx = key->conf.keyidx; bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; /* * We want to delay tailroom updates only for station - in that * case it helps roaming speed, but in other cases it hurts and * can cause warnings to appear. */ bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION; int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (sta && pairwise) { struct ieee80211_key *alt_key; old_key = wiphy_dereference(sdata->local->hw.wiphy, sta->ptk[idx]); alt_key = wiphy_dereference(sdata->local->hw.wiphy, sta->ptk[idx ^ 1]); /* The rekey code assumes that the old and new key are using * the same cipher. Enforce the assumption for pairwise keys. */ if ((alt_key && alt_key->conf.cipher != key->conf.cipher) || (old_key && old_key->conf.cipher != key->conf.cipher)) { ret = -EOPNOTSUPP; goto out; } } else if (sta) { struct link_sta_info *link_sta = &sta->deflink; int link_id = key->conf.link_id; if (link_id >= 0) { link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) { ret = -ENOLINK; goto out; } } old_key = wiphy_dereference(sdata->local->hw.wiphy, link_sta->gtk[idx]); } else { if (idx < NUM_DEFAULT_KEYS) old_key = wiphy_dereference(sdata->local->hw.wiphy, sdata->keys[idx]); if (!old_key) old_key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); } /* Non-pairwise keys must also not switch the cipher on rekey */ if (!pairwise) { if (old_key && old_key->conf.cipher != key->conf.cipher) { ret = -EOPNOTSUPP; goto out; } } /* * Silently accept key re-installation without really installing the * new version of the key to avoid nonce reuse or replay issues. */ if (ieee80211_key_identical(sdata, old_key, key)) { ret = -EALREADY; goto out; } key->local = sdata->local; key->sdata = sdata; key->sta = sta; /* * Assign a unique ID to every key so we can easily prevent mixed * key and fragment cache attacks. */ key->color = atomic_inc_return(&key_color); /* keep this flag for easier access later */ if (sta && sta->sta.spp_amsdu) key->conf.flags |= IEEE80211_KEY_FLAG_SPP_AMSDU; increment_tailroom_need_count(sdata); ret = ieee80211_key_replace(sdata, link, sta, pairwise, old_key, key); if (!ret) { ieee80211_debugfs_key_add(key); ieee80211_key_destroy(old_key, delay_tailroom); } else { ieee80211_key_free(key, delay_tailroom); } key = NULL; out: ieee80211_key_free_unused(key); return ret; } void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) { if (!key) return; /* * Replace key with nothingness if it was ever used. */ if (key->sdata) ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); ieee80211_key_destroy(key, delay_tailroom); } void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; struct ieee80211_sub_if_data *vlan; lockdep_assert_wiphy(sdata->local->hw.wiphy); sdata->crypto_tx_tailroom_needed_cnt = 0; sdata->crypto_tx_tailroom_pending_dec = 0; if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->crypto_tx_tailroom_needed_cnt = 0; vlan->crypto_tx_tailroom_pending_dec = 0; } } if (ieee80211_sdata_running(sdata)) { list_for_each_entry(key, &sdata->key_list, list) { increment_tailroom_need_count(sdata); ieee80211_key_enable_hw_accel(key); } } } void ieee80211_iter_keys(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void (*iter)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key, void *data), void *iter_data) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_key *key, *tmp; struct ieee80211_sub_if_data *sdata; lockdep_assert_wiphy(hw->wiphy); if (vif) { sdata = vif_to_sdata(vif); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) iter(hw, &sdata->vif, key->sta ? &key->sta->sta : NULL, &key->conf, iter_data); } else { list_for_each_entry(sdata, &local->interfaces, list) list_for_each_entry_safe(key, tmp, &sdata->key_list, list) iter(hw, &sdata->vif, key->sta ? &key->sta->sta : NULL, &key->conf, iter_data); } } EXPORT_SYMBOL(ieee80211_iter_keys); static void _ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata, void (*iter)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key, void *data), void *iter_data) { struct ieee80211_key *key; list_for_each_entry_rcu(key, &sdata->key_list, list) { /* skip keys of station in removal process */ if (key->sta && key->sta->removed) continue; if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) continue; iter(hw, &sdata->vif, key->sta ? &key->sta->sta : NULL, &key->conf, iter_data); } } void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void (*iter)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key, void *data), void *iter_data) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; if (vif) { sdata = vif_to_sdata(vif); _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); } else { list_for_each_entry_rcu(sdata, &local->interfaces, list) _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); } } EXPORT_SYMBOL(ieee80211_iter_keys_rcu); static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, struct list_head *keys) { struct ieee80211_key *key, *tmp; decrease_tailroom_need_count(sdata, sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; ieee80211_debugfs_key_remove_mgmt_default(sdata); ieee80211_debugfs_key_remove_beacon_default(sdata); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); list_add_tail(&key->list, keys); } ieee80211_debugfs_key_update_default(sdata); } void ieee80211_remove_link_keys(struct ieee80211_link_data *link, struct list_head *keys) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_key *key, *tmp; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { if (key->conf.link_id != link->link_id) continue; ieee80211_key_replace(key->sdata, link, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); list_add_tail(&key->list, keys); } } void ieee80211_free_key_list(struct ieee80211_local *local, struct list_head *keys) { struct ieee80211_key *key, *tmp; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(key, tmp, keys, list) __ieee80211_key_destroy(key, false); } void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, bool force_synchronize) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *vlan; struct ieee80211_sub_if_data *master; struct ieee80211_key *key, *tmp; LIST_HEAD(keys); wiphy_delayed_work_cancel(local->hw.wiphy, &sdata->dec_tailroom_needed_wk); lockdep_assert_wiphy(local->hw.wiphy); ieee80211_free_keys_iface(sdata, &keys); if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) ieee80211_free_keys_iface(vlan, &keys); } if (!list_empty(&keys) || force_synchronize) synchronize_net(); list_for_each_entry_safe(key, tmp, &keys, list) __ieee80211_key_destroy(key, false); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { if (sdata->bss) { master = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt != master->crypto_tx_tailroom_needed_cnt); } } else { WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || sdata->crypto_tx_tailroom_pending_dec); } if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || vlan->crypto_tx_tailroom_pending_dec); } } void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta) { struct ieee80211_key *key; int i; lockdep_assert_wiphy(local->hw.wiphy); for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) { key = wiphy_dereference(local->hw.wiphy, sta->deflink.gtk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } for (i = 0; i < NUM_DEFAULT_KEYS; i++) { key = wiphy_dereference(local->hw.wiphy, sta->ptk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } } void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, struct wiphy_work *wk) { struct ieee80211_sub_if_data *sdata; sdata = container_of(wk, struct ieee80211_sub_if_data, dec_tailroom_needed_wk.work); /* * The reason for the delayed tailroom needed decrementing is to * make roaming faster: during roaming, all keys are first deleted * and then new keys are installed. The first new key causes the * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes * the cost of synchronize_net() (which can be slow). Avoid this * by deferring the crypto_tx_tailroom_needed_cnt decrementing on * key removal for a while, so if we roam the value is larger than * zero and no 0->1 transition happens. * * The cost is that if the AP switching was from an AP with keys * to one without, we still allocate tailroom while it would no * longer be needed. However, in the typical (fast) roaming case * within an ESS this usually won't happen. */ decrease_tailroom_need_count(sdata, sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; } void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); trace_api_gtk_rekey_notify(sdata, bssid, replay_ctr); cfg80211_gtk_rekey_notify(sdata->dev, bssid, replay_ctr, gfp); } EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_notify); void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, int tid, struct ieee80211_key_seq *seq) { struct ieee80211_key *key; const u8 *pn; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS)) return; seq->tkip.iv32 = key->u.tkip.rx[tid].iv32; seq->tkip.iv16 = key->u.tkip.rx[tid].iv16; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_gmac.rx_pn; memcpy(seq->aes_gmac.pn, pn, IEEE80211_GMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.gcmp.rx_pn[tid]; memcpy(seq->gcmp.pn, pn, IEEE80211_GCMP_PN_LEN); break; } } EXPORT_SYMBOL(ieee80211_get_key_rx_seq); void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, int tid, struct ieee80211_key_seq *seq) { struct ieee80211_key *key; u8 *pn; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS)) return; key->u.tkip.rx[tid].iv32 = seq->tkip.iv32; key->u.tkip.rx[tid].iv16 = seq->tkip.iv16; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_gmac.rx_pn; memcpy(pn, seq->aes_gmac.pn, IEEE80211_GMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.gcmp.rx_pn[tid]; memcpy(pn, seq->gcmp.pn, IEEE80211_GCMP_PN_LEN); break; default: WARN_ON(1); break; } } EXPORT_SYMBOL_GPL(ieee80211_set_key_rx_seq); void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) { struct ieee80211_key *key; key = container_of(keyconf, struct ieee80211_key, conf); lockdep_assert_wiphy(key->local->hw.wiphy); /* * if key was uploaded, we assume the driver will/has remove(d) * it, so adjust bookkeeping accordingly */ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(key->sdata); } ieee80211_key_free(key, false); } EXPORT_SYMBOL_GPL(ieee80211_remove_key); struct ieee80211_key_conf * ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, struct ieee80211_key_conf *keyconf, int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; int err; struct ieee80211_link_data *link_data = link_id < 0 ? &sdata->deflink : sdata_dereference(sdata->link[link_id], sdata); if (WARN_ON(!link_data)) return ERR_PTR(-EINVAL); if (WARN_ON(!local->wowlan)) return ERR_PTR(-EINVAL); if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return ERR_PTR(-EINVAL); key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx, keyconf->keylen, keyconf->key, 0, NULL); if (IS_ERR(key)) return ERR_CAST(key); if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; key->conf.link_id = link_id; err = ieee80211_key_link(key, link_data, NULL); if (err) return ERR_PTR(err); return &key->conf; } EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_add); void ieee80211_key_mic_failure(struct ieee80211_key_conf *keyconf) { struct ieee80211_key *key; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->u.aes_cmac.icverrors++; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: key->u.aes_gmac.icverrors++; break; default: /* ignore the others for now, we don't keep counters now */ break; } } EXPORT_SYMBOL_GPL(ieee80211_key_mic_failure); void ieee80211_key_replay(struct ieee80211_key_conf *keyconf) { struct ieee80211_key *key; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: key->u.ccmp.replays++; break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->u.aes_cmac.replays++; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: key->u.aes_gmac.replays++; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: key->u.gcmp.replays++; break; } } EXPORT_SYMBOL_GPL(ieee80211_key_replay); int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, unsigned long del_links_mask, unsigned long add_links_mask) { struct ieee80211_key *key; int ret; list_for_each_entry(key, &sdata->key_list, list) { if (key->conf.link_id < 0 || !(del_links_mask & BIT(key->conf.link_id))) continue; /* shouldn't happen for per-link keys */ WARN_ON(key->sta); ieee80211_key_disable_hw_accel(key); } list_for_each_entry(key, &sdata->key_list, list) { if (key->conf.link_id < 0 || !(add_links_mask & BIT(key->conf.link_id))) continue; /* shouldn't happen for per-link keys */ WARN_ON(key->sta); ret = ieee80211_key_enable_hw_accel(key); if (ret) return ret; } return 0; } |
2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/panic.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * This function is used through-out the kernel (including mm and fs) * to indicate a major problem. */ #include <linux/debug_locks.h> #include <linux/sched/debug.h> #include <linux/interrupt.h> #include <linux/kgdb.h> #include <linux/kmsg_dump.h> #include <linux/kallsyms.h> #include <linux/notifier.h> #include <linux/vt_kern.h> #include <linux/module.h> #include <linux/random.h> #include <linux/ftrace.h> #include <linux/reboot.h> #include <linux/delay.h> #include <linux/kexec.h> #include <linux/panic_notifier.h> #include <linux/sched.h> #include <linux/string_helpers.h> #include <linux/sysrq.h> #include <linux/init.h> #include <linux/nmi.h> #include <linux/console.h> #include <linux/bug.h> #include <linux/ratelimit.h> #include <linux/debugfs.h> #include <linux/sysfs.h> #include <linux/context_tracking.h> #include <linux/seq_buf.h> #include <trace/events/error_report.h> #include <asm/sections.h> #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 #ifdef CONFIG_SMP /* * Should we dump all CPUs backtraces in an oops event? * Defaults to 0, can be changed via sysctl. */ static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; #else #define sysctl_oops_all_cpu_backtrace 0 #endif /* CONFIG_SMP */ int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; static unsigned long tainted_mask = IS_ENABLED(CONFIG_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0; static int pause_on_oops; static int pause_on_oops_flag; static DEFINE_SPINLOCK(pause_on_oops_lock); bool crash_kexec_post_notifiers; int panic_on_warn __read_mostly; unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; bool panic_triggering_all_cpu_backtrace; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); #define PANIC_PRINT_TASK_INFO 0x00000001 #define PANIC_PRINT_MEM_INFO 0x00000002 #define PANIC_PRINT_TIMER_INFO 0x00000004 #define PANIC_PRINT_LOCK_INFO 0x00000008 #define PANIC_PRINT_FTRACE_INFO 0x00000010 #define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020 #define PANIC_PRINT_ALL_CPU_BT 0x00000040 #define PANIC_PRINT_BLOCKED_TASKS 0x00000080 unsigned long panic_print; ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); #ifdef CONFIG_SYSCTL static struct ctl_table kern_panic_table[] = { #ifdef CONFIG_SMP { .procname = "oops_all_cpu_backtrace", .data = &sysctl_oops_all_cpu_backtrace, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif { .procname = "warn_limit", .data = &warn_limit, .maxlen = sizeof(warn_limit), .mode = 0644, .proc_handler = proc_douintvec, }, }; static __init int kernel_panic_sysctls_init(void) { register_sysctl_init("kernel", kern_panic_table); return 0; } late_initcall(kernel_panic_sysctls_init); #endif static atomic_t warn_count = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); } static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); static __init int kernel_panic_sysfs_init(void) { sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); return 0; } late_initcall(kernel_panic_sysfs_init); #endif static long no_blink(int state) { return 0; } /* Returns how long it waited in ms */ long (*panic_blink)(int state); EXPORT_SYMBOL(panic_blink); /* * Stop ourself in panic -- architecture code may override this */ void __weak __noreturn panic_smp_self_stop(void) { while (1) cpu_relax(); } /* * Stop ourselves in NMI context if another CPU has already panicked. Arch code * may override this to prepare for crash dumping, e.g. save regs info. */ void __weak __noreturn nmi_panic_self_stop(struct pt_regs *regs) { panic_smp_self_stop(); } /* * Stop other CPUs in panic. Architecture dependent code may override this * with more suitable version. For example, if the architecture supports * crash dump, it should save registers of each stopped CPU and disable * per-CPU features such as virtualization extensions. */ void __weak crash_smp_send_stop(void) { static int cpus_stopped; /* * This function can be called twice in panic path, but obviously * we execute this only once. */ if (cpus_stopped) return; /* * Note smp_send_stop is the usual smp shutdown function, which * unfortunately means it may not be hardened to work in a panic * situation. */ smp_send_stop(); cpus_stopped = 1; } atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); /* * A variant of panic() called from NMI context. We return if we've already * panicked on this CPU. If another CPU already panicked, loop in * nmi_panic_self_stop() which can provide architecture dependent code such * as saving register state for crash dump. */ void nmi_panic(struct pt_regs *regs, const char *msg) { int old_cpu, this_cpu; old_cpu = PANIC_CPU_INVALID; this_cpu = raw_smp_processor_id(); /* atomic_try_cmpxchg updates old_cpu on failure */ if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) panic("%s", msg); else if (old_cpu != this_cpu) nmi_panic_self_stop(regs); } EXPORT_SYMBOL(nmi_panic); static void panic_print_sys_info(bool console_flush) { if (console_flush) { if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) console_flush_on_panic(CONSOLE_REPLAY_ALL); return; } if (panic_print & PANIC_PRINT_TASK_INFO) show_state(); if (panic_print & PANIC_PRINT_MEM_INFO) show_mem(); if (panic_print & PANIC_PRINT_TIMER_INFO) sysrq_timer_list_show(); if (panic_print & PANIC_PRINT_LOCK_INFO) debug_show_all_locks(); if (panic_print & PANIC_PRINT_FTRACE_INFO) ftrace_dump(DUMP_ALL); if (panic_print & PANIC_PRINT_BLOCKED_TASKS) show_state_filter(TASK_UNINTERRUPTIBLE); } void check_panic_on_warn(const char *origin) { unsigned int limit; if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); limit = READ_ONCE(warn_limit); if (atomic_inc_return(&warn_count) >= limit && limit) panic("%s: system warned too often (kernel.warn_limit is %d)", origin, limit); } /* * Helper that triggers the NMI backtrace (if set in panic_print) * and then performs the secondary CPUs shutdown - we cannot have * the NMI backtrace after the CPUs are off! */ static void panic_other_cpus_shutdown(bool crash_kexec) { if (panic_print & PANIC_PRINT_ALL_CPU_BT) { /* Temporary allow non-panic CPUs to write their backtraces. */ panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); panic_triggering_all_cpu_backtrace = false; } /* * Note that smp_send_stop() is the usual SMP shutdown function, * which unfortunately may not be hardened to work in a panic * situation. If we want to do crash dump after notifier calls * and kmsg_dump, we will need architecture dependent extra * bits in addition to stopping other CPUs, hence we rely on * crash_smp_send_stop() for that. */ if (!crash_kexec) smp_send_stop(); else crash_smp_send_stop(); } /** * panic - halt the system * @fmt: The text string to print * * Display a message, then perform cleanups. * * This function never returns. */ void panic(const char *fmt, ...) { static char buf[1024]; va_list args; long i, i_next = 0, len; int state = 0; int old_cpu, this_cpu; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; if (panic_on_warn) { /* * This thread may hit another WARN() in the panic path. * Resetting this prevents additional WARN() from panicking the * system on this thread. Other threads are blocked by the * panic_mutex in panic(). */ panic_on_warn = 0; } /* * Disable local interrupts. This will prevent panic_smp_self_stop * from deadlocking the first cpu that invokes the panic, since * there is nothing to prevent an interrupt handler (that runs * after setting panic_cpu) from invoking panic() again. */ local_irq_disable(); preempt_disable_notrace(); /* * It's possible to come here directly from a panic-assertion and * not have preempt disabled. Some functions called from here want * preempt to be disabled. No point enabling it later though... * * Only one CPU is allowed to execute the panic code from here. For * multiple parallel invocations of panic, all other CPUs either * stop themself or will wait until they are stopped by the 1st CPU * with smp_send_stop(). * * cmpxchg success means this is the 1st CPU which comes here, * so go ahead. * `old_cpu == this_cpu' means we came from nmi_panic() which sets * panic_cpu to this CPU. In this case, this is also the 1st CPU. */ old_cpu = PANIC_CPU_INVALID; this_cpu = raw_smp_processor_id(); /* atomic_try_cmpxchg updates old_cpu on failure */ if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) { /* go ahead */ } else if (old_cpu != this_cpu) panic_smp_self_stop(); console_verbose(); bust_spinlocks(1); va_start(args, fmt); len = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); if (len && buf[len - 1] == '\n') buf[len - 1] = '\0'; pr_emerg("Kernel panic - not syncing: %s\n", buf); #ifdef CONFIG_DEBUG_BUGVERBOSE /* * Avoid nested stack-dumping if a panic occurs during oops processing */ if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) dump_stack(); #endif /* * If kgdb is enabled, give it a chance to run before we stop all * the other CPUs or else we won't be able to debug processes left * running on them. */ kgdb_panic(buf); /* * If we have crashed and we have a crash kernel loaded let it handle * everything else. * If we want to run this after calling panic_notifiers, pass * the "crash_kexec_post_notifiers" option to the kernel. * * Bypass the panic_cpu check and call __crash_kexec directly. */ if (!_crash_kexec_post_notifiers) __crash_kexec(NULL); panic_other_cpus_shutdown(_crash_kexec_post_notifiers); printk_legacy_allow_panic_sync(); /* * Run any panic handlers, including those that might need to * add information to the kmsg dump output. */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); panic_print_sys_info(false); kmsg_dump_desc(KMSG_DUMP_PANIC, buf); /* * If you doubt kdump always works fine in any situation, * "crash_kexec_post_notifiers" offers you a chance to run * panic_notifiers and dumping kmsg before kdump. * Note: since some panic_notifiers can make crashed kernel * more unstable, it can increase risks of the kdump failure too. * * Bypass the panic_cpu check and call __crash_kexec directly. */ if (_crash_kexec_post_notifiers) __crash_kexec(NULL); console_unblank(); /* * We may have ended up stopping the CPU holding the lock (in * smp_send_stop()) while still having some valuable data in the console * buffer. Try to acquire the lock then release it regardless of the * result. The release will also print the buffers out. Locks debug * should be disabled to avoid reporting bad unlock balance when * panic() is not being callled from OOPS. */ debug_locks_off(); console_flush_on_panic(CONSOLE_FLUSH_PENDING); panic_print_sys_info(true); if (!panic_blink) panic_blink = no_blink; if (panic_timeout > 0) { /* * Delay timeout seconds before rebooting the machine. * We can't use the "normal" timers since we just panicked. */ pr_emerg("Rebooting in %d seconds..\n", panic_timeout); for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { touch_nmi_watchdog(); if (i >= i_next) { i += panic_blink(state ^= 1); i_next = i + 3600 / PANIC_BLINK_SPD; } mdelay(PANIC_TIMER_STEP); } } if (panic_timeout != 0) { /* * This will not be a clean reboot, with everything * shutting down. But if there is a chance of * rebooting the system it will be rebooted. */ if (panic_reboot_mode != REBOOT_UNDEFINED) reboot_mode = panic_reboot_mode; emergency_restart(); } #ifdef __sparc__ { extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" "twice on console to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) disabled_wait(); #endif pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); /* Do not scroll important messages printed above */ suppress_printk = 1; /* * The final messages may not have been printed if in a context that * defers printing (such as NMI) and irq_work is not available. * Explicitly flush the kernel log buffer one last time. */ console_flush_on_panic(CONSOLE_FLUSH_PENDING); nbcon_atomic_flush_unsafe(); local_irq_enable(); for (i = 0; ; i += PANIC_TIMER_STEP) { touch_softlockup_watchdog(); if (i >= i_next) { i += panic_blink(state ^= 1); i_next = i + 3600 / PANIC_BLINK_SPD; } mdelay(PANIC_TIMER_STEP); } } EXPORT_SYMBOL(panic); #define TAINT_FLAG(taint, _c_true, _c_false, _module) \ [ TAINT_##taint ] = { \ .c_true = _c_true, .c_false = _c_false, \ .module = _module, \ .desc = #taint, \ } /* * TAINT_FORCED_RMMOD could be a per-module flag but the module * is being removed anyway. */ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { TAINT_FLAG(PROPRIETARY_MODULE, 'P', 'G', true), TAINT_FLAG(FORCED_MODULE, 'F', ' ', true), TAINT_FLAG(CPU_OUT_OF_SPEC, 'S', ' ', false), TAINT_FLAG(FORCED_RMMOD, 'R', ' ', false), TAINT_FLAG(MACHINE_CHECK, 'M', ' ', false), TAINT_FLAG(BAD_PAGE, 'B', ' ', false), TAINT_FLAG(USER, 'U', ' ', false), TAINT_FLAG(DIE, 'D', ' ', false), TAINT_FLAG(OVERRIDDEN_ACPI_TABLE, 'A', ' ', false), TAINT_FLAG(WARN, 'W', ' ', false), TAINT_FLAG(CRAP, 'C', ' ', true), TAINT_FLAG(FIRMWARE_WORKAROUND, 'I', ' ', false), TAINT_FLAG(OOT_MODULE, 'O', ' ', true), TAINT_FLAG(UNSIGNED_MODULE, 'E', ' ', true), TAINT_FLAG(SOFTLOCKUP, 'L', ' ', false), TAINT_FLAG(LIVEPATCH, 'K', ' ', true), TAINT_FLAG(AUX, 'X', ' ', true), TAINT_FLAG(RANDSTRUCT, 'T', ' ', true), TAINT_FLAG(TEST, 'N', ' ', true), }; #undef TAINT_FLAG static void print_tainted_seq(struct seq_buf *s, bool verbose) { const char *sep = ""; int i; if (!tainted_mask) { seq_buf_puts(s, "Not tainted"); return; } seq_buf_printf(s, "Tainted: "); for (i = 0; i < TAINT_FLAGS_COUNT; i++) { const struct taint_flag *t = &taint_flags[i]; bool is_set = test_bit(i, &tainted_mask); char c = is_set ? t->c_true : t->c_false; if (verbose) { if (is_set) { seq_buf_printf(s, "%s[%c]=%s", sep, c, t->desc); sep = ", "; } } else { seq_buf_putc(s, c); } } } static const char *_print_tainted(bool verbose) { /* FIXME: what should the size be? */ static char buf[sizeof(taint_flags)]; struct seq_buf s; BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); seq_buf_init(&s, buf, sizeof(buf)); print_tainted_seq(&s, verbose); return seq_buf_str(&s); } /** * print_tainted - return a string to represent the kernel taint state. * * For individual taint flag meanings, see Documentation/admin-guide/sysctl/kernel.rst * * The string is overwritten by the next call to print_tainted(), * but is always NULL terminated. */ const char *print_tainted(void) { return _print_tainted(false); } /** * print_tainted_verbose - A more verbose version of print_tainted() */ const char *print_tainted_verbose(void) { return _print_tainted(true); } int test_taint(unsigned flag) { return test_bit(flag, &tainted_mask); } EXPORT_SYMBOL(test_taint); unsigned long get_taint(void) { return tainted_mask; } /** * add_taint: add a taint flag if not already set. * @flag: one of the TAINT_* constants. * @lockdep_ok: whether lock debugging is still OK. * * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for * some notewortht-but-not-corrupting cases, it can be set to true. */ void add_taint(unsigned flag, enum lockdep_ok lockdep_ok) { if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off()) pr_warn("Disabling lock debugging due to kernel taint\n"); set_bit(flag, &tainted_mask); if (tainted_mask & panic_on_taint) { panic_on_taint = 0; panic("panic_on_taint set ..."); } } EXPORT_SYMBOL(add_taint); static void spin_msec(int msecs) { int i; for (i = 0; i < msecs; i++) { touch_nmi_watchdog(); mdelay(1); } } /* * It just happens that oops_enter() and oops_exit() are identically * implemented... */ static void do_oops_enter_exit(void) { unsigned long flags; static int spin_counter; if (!pause_on_oops) return; spin_lock_irqsave(&pause_on_oops_lock, flags); if (pause_on_oops_flag == 0) { /* This CPU may now print the oops message */ pause_on_oops_flag = 1; } else { /* We need to stall this CPU */ if (!spin_counter) { /* This CPU gets to do the counting */ spin_counter = pause_on_oops; do { spin_unlock(&pause_on_oops_lock); spin_msec(MSEC_PER_SEC); spin_lock(&pause_on_oops_lock); } while (--spin_counter); pause_on_oops_flag = 0; } else { /* This CPU waits for a different one */ while (spin_counter) { spin_unlock(&pause_on_oops_lock); spin_msec(1); spin_lock(&pause_on_oops_lock); } } } spin_unlock_irqrestore(&pause_on_oops_lock, flags); } /* * Return true if the calling CPU is allowed to print oops-related info. * This is a bit racy.. */ bool oops_may_print(void) { return pause_on_oops_flag == 0; } /* * Called when the architecture enters its oops handler, before it prints * anything. If this is the first CPU to oops, and it's oopsing the first * time then let it proceed. * * This is all enabled by the pause_on_oops kernel boot option. We do all * this to ensure that oopses don't scroll off the screen. It has the * side-effect of preventing later-oopsing CPUs from mucking up the display, * too. * * It turns out that the CPU which is allowed to print ends up pausing for * the right duration, whereas all the other CPUs pause for twice as long: * once in oops_enter(), once in oops_exit(). */ void oops_enter(void) { nbcon_cpu_emergency_enter(); tracing_off(); /* can't trust the integrity of the kernel anymore: */ debug_locks_off(); do_oops_enter_exit(); if (sysctl_oops_all_cpu_backtrace) trigger_all_cpu_backtrace(); } static void print_oops_end_marker(void) { pr_warn("---[ end trace %016llx ]---\n", 0ULL); } /* * Called when the architecture exits its oops handler, after printing * everything. */ void oops_exit(void) { do_oops_enter_exit(); print_oops_end_marker(); nbcon_cpu_emergency_exit(); kmsg_dump(KMSG_DUMP_OOPS); } struct warn_args { const char *fmt; va_list args; }; void __warn(const char *file, int line, void *caller, unsigned taint, struct pt_regs *regs, struct warn_args *args) { nbcon_cpu_emergency_enter(); disable_trace_on_warning(); if (file) pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", raw_smp_processor_id(), current->pid, file, line, caller); else pr_warn("WARNING: CPU: %d PID: %d at %pS\n", raw_smp_processor_id(), current->pid, caller); #pragma GCC diagnostic push #ifndef __clang__ #pragma GCC diagnostic ignored "-Wsuggest-attribute=format" #endif if (args) vprintk(args->fmt, args->args); #pragma GCC diagnostic pop print_modules(); if (regs) show_regs(regs); check_panic_on_warn("kernel"); if (!regs) dump_stack(); print_irqtrace_events(current); print_oops_end_marker(); trace_error_report_end(ERROR_DETECTOR_WARN, (unsigned long)caller); /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); nbcon_cpu_emergency_exit(); } #ifdef CONFIG_BUG #ifndef __WARN_FLAGS void warn_slowpath_fmt(const char *file, int line, unsigned taint, const char *fmt, ...) { bool rcu = warn_rcu_enter(); struct warn_args args; pr_warn(CUT_HERE); if (!fmt) { __warn(file, line, __builtin_return_address(0), taint, NULL, NULL); warn_rcu_exit(rcu); return; } args.fmt = fmt; va_start(args.args, fmt); __warn(file, line, __builtin_return_address(0), taint, NULL, &args); va_end(args.args); warn_rcu_exit(rcu); } EXPORT_SYMBOL(warn_slowpath_fmt); #else void __warn_printk(const char *fmt, ...) { bool rcu = warn_rcu_enter(); va_list args; pr_warn(CUT_HERE); va_start(args, fmt); vprintk(fmt, args); va_end(args); warn_rcu_exit(rcu); } EXPORT_SYMBOL(__warn_printk); #endif /* Support resetting WARN*_ONCE state */ static int clear_warn_once_set(void *data, u64 val) { generic_bug_clear_once(); memset(__start_once, 0, __end_once - __start_once); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(clear_warn_once_fops, NULL, clear_warn_once_set, "%lld\n"); static __init int register_warn_debugfs(void) { /* Don't care about failure */ debugfs_create_file_unsafe("clear_warn_once", 0200, NULL, NULL, &clear_warn_once_fops); return 0; } device_initcall(register_warn_debugfs); #endif #ifdef CONFIG_STACKPROTECTOR /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ __visible noinstr void __stack_chk_fail(void) { instrumentation_begin(); panic("stack-protector: Kernel stack is corrupted in: %pB", __builtin_return_address(0)); instrumentation_end(); } EXPORT_SYMBOL(__stack_chk_fail); #endif core_param(panic, panic_timeout, int, 0644); core_param(panic_print, panic_print, ulong, 0644); core_param(pause_on_oops, pause_on_oops, int, 0644); core_param(panic_on_warn, panic_on_warn, int, 0644); core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); static int __init oops_setup(char *s) { if (!s) return -EINVAL; if (!strcmp(s, "panic")) panic_on_oops = 1; return 0; } early_param("oops", oops_setup); static int __init panic_on_taint_setup(char *s) { char *taint_str; if (!s) return -EINVAL; taint_str = strsep(&s, ","); if (kstrtoul(taint_str, 16, &panic_on_taint)) return -EINVAL; /* make sure panic_on_taint doesn't hold out-of-range TAINT flags */ panic_on_taint &= TAINT_FLAGS_MAX; if (!panic_on_taint) return -EINVAL; if (s && !strcmp(s, "nousertaint")) panic_on_taint_nousertaint = true; pr_info("panic_on_taint: bitmask=0x%lx nousertaint_mode=%s\n", panic_on_taint, str_enabled_disabled(panic_on_taint_nousertaint)); return 0; } early_param("panic_on_taint", panic_on_taint_setup); |
50 50 49 49 48 45 2 45 45 3 46 46 49 49 3 3 40 7 7 1 6 1 48 17 46 40 51 51 49 7 38 3 3 47 47 1 2 46 4 2 2 2 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Forwarding decision * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/err.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/netpoll.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/netfilter_bridge.h> #include "br_private.h" /* Don't forward packets to originating port or forwarding disabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { struct net_bridge_vlan_group *vg; vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) && br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && eth_type_vlan(skb->protocol)) { int depth; if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth)) goto drop; skb_set_network_header(skb, depth); } br_switchdev_frame_set_offload_fwd_mark(skb); dev_queue_xmit(skb); return 0; drop: kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_clear_tstamp(skb); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); } EXPORT_SYMBOL_GPL(br_forward_finish); static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_orig) { struct net_bridge_vlan_group *vg; struct net_device *indev; struct net *net; int br_hook; /* Mark the skb for forwarding offload early so that br_handle_vlan() * can know whether to pop the VLAN header on egress or keep it. */ nbp_switchdev_frame_mark_tx_fwd_offload(to, skb); vg = nbp_vlan_group_rcu(to); skb = br_handle_vlan(to->br, to, vg, skb); if (!skb) return; indev = skb->dev; skb->dev = to->dev; if (!local_orig) { if (skb_warn_if_lro(skb)) { kfree_skb(skb); return; } br_hook = NF_BR_FORWARD; skb_forward_csum(skb); net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); else br_netpoll_send_skb(to, skb); return; } br_hook = NF_BR_LOCAL_OUT; net = dev_net(skb->dev); indev = NULL; } NF_HOOK(NFPROTO_BRIDGE, br_hook, net, NULL, skb, indev, skb->dev, br_forward_finish); } static int deliver_clone(const struct net_bridge_port *prev, struct sk_buff *skb, bool local_orig) { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { DEV_STATS_INC(dev, tx_dropped); return -ENOMEM; } __br_forward(prev, skb, local_orig); return 0; } /** * br_forward - forward a packet to a specific port * @to: destination port * @skb: packet being forwarded * @local_rcv: packet will be received locally after forwarding * @local_orig: packet is locally originated * * Should be called with rcu_read_lock. */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig) { if (unlikely(!to)) goto out; /* redirect to backup link if the destination port is down */ if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) { struct net_bridge_port *backup_port; backup_port = rcu_dereference(to->backup_port); if (unlikely(!backup_port)) goto out; BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid); to = backup_port; } if (should_deliver(to, skb)) { if (local_rcv) deliver_clone(to, skb, local_orig); else __br_forward(to, skb, local_orig); return; } out: if (!local_rcv) kfree_skb(skb); } EXPORT_SYMBOL_GPL(br_forward); static struct net_bridge_port *maybe_deliver( struct net_bridge_port *prev, struct net_bridge_port *p, struct sk_buff *skb, bool local_orig) { u8 igmp_type = br_multicast_igmp_type(skb); int err; if (!should_deliver(p, skb)) return prev; nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb); if (!prev) goto out; err = deliver_clone(prev, skb, local_orig); if (err) return ERR_PTR(err); out: br_multicast_count(p->br, p, skb, igmp_type, BR_MCAST_DIR_TX); return p; } /* called under rcu_read_lock */ void br_flood(struct net_bridge *br, struct sk_buff *skb, enum br_pkt_type pkt_type, bool local_rcv, bool local_orig, u16 vid) { struct net_bridge_port *prev = NULL; struct net_bridge_port *p; br_tc_skb_miss_set(skb, pkt_type != BR_PKT_BROADCAST); list_for_each_entry_rcu(p, &br->port_list, list) { /* Do not flood unicast traffic to ports that turn it off, nor * other traffic if flood off, except for traffic we originate */ switch (pkt_type) { case BR_PKT_UNICAST: if (!(p->flags & BR_FLOOD)) continue; break; case BR_PKT_MULTICAST: if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) continue; break; case BR_PKT_BROADCAST: if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev) continue; break; } /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; if (BR_INPUT_SKB_CB(skb)->proxyarp_replied && ((p->flags & BR_PROXYARP_WIFI) || br_is_neigh_suppress_enabled(p, vid))) continue; prev = maybe_deliver(prev, p, skb, local_orig); if (IS_ERR(prev)) goto out; } if (!prev) goto out; if (local_rcv) deliver_clone(prev, skb, local_orig); else __br_forward(prev, skb, local_orig); return; out: if (!local_rcv) kfree_skb(skb); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, const unsigned char *addr, bool local_orig) { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; const unsigned char *src = eth_hdr(skb)->h_source; struct sk_buff *nskb; if (!should_deliver(p, skb)) return; /* Even with hairpin, no soliloquies - prevent breaking IPv6 DAD */ if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; __skb_push(skb, ETH_HLEN); nskb = pskb_copy(skb, GFP_ATOMIC); __skb_pull(skb, ETH_HLEN); if (!nskb) { DEV_STATS_INC(dev, tx_dropped); return; } skb = nskb; __skb_pull(skb, ETH_HLEN); if (!is_broadcast_ether_addr(addr)) memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); __br_forward(p, skb, local_orig); } /* called with rcu_read_lock */ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; bool allow_mode_include = true; struct hlist_node *rp; rp = br_multicast_get_first_rport_node(brmctx, skb); if (mdst) { p = rcu_dereference(mdst->ports); if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) && br_multicast_is_star_g(&mdst->addr)) allow_mode_include = false; } else { p = NULL; br_tc_skb_miss_set(skb, true); } while (p || rp) { struct net_bridge_port *port, *lport, *rport; lport = p ? p->key.port : NULL; rport = br_multicast_rport_from_node_skb(rp, skb); if ((unsigned long)lport > (unsigned long)rport) { port = lport; if (port->flags & BR_MULTICAST_TO_UNICAST) { maybe_deliver_addr(lport, skb, p->eth_addr, local_orig); goto delivered; } if ((!allow_mode_include && p->filter_mode == MCAST_INCLUDE) || (p->flags & MDB_PG_FLAGS_BLOCKED)) goto delivered; } else { port = rport; } prev = maybe_deliver(prev, port, skb, local_orig); if (IS_ERR(prev)) goto out; delivered: if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); if ((unsigned long)rport >= (unsigned long)port) rp = rcu_dereference(hlist_next_rcu(rp)); } if (!prev) goto out; if (local_rcv) deliver_clone(prev, skb, local_orig); else __br_forward(prev, skb, local_orig); return; out: if (!local_rcv) kfree_skb(skb); } #endif |
282 269 267 81 279 127 4 232 230 3 224 222 217 70 221 127 15 224 1 219 15 5 5 2 1 4 6 6 4 1 5 11 6 5 9 9 9 9 9 49 3 48 276 1 276 275 6 2 29 171 161 361 1 1 1 275 337 331 6 143 190 4 4 3 16 14 16 1 11 336 1 372 1 1 1 1 347 357 1 366 378 3 11 8 6 8 6 215 1 215 2 210 3 2 39 128 119 17 1 16 1 13 1 385 382 371 371 2 2 2 37 37 36 20 17 8 20 20 12 19 14 6 6 24 8 16 6 10 8 8 8 8 8 8 8 8 8 5 5 5 5 303 127 299 303 293 118 295 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 | // SPDX-License-Identifier: GPL-2.0-only /* * x_tables core - Backend for {ip,ip6,arp}_tables * * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org> * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * Based on existing ip_tables code which is * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/net.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/mutex.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/audit.h> #include <linux/user_namespace.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_arp/arp_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define XT_PCPU_BLOCK_SIZE 4096 #define XT_MAX_TABLE_SIZE (512 * 1024 * 1024) struct xt_template { struct list_head list; /* called when table is needed in the given netns */ int (*table_init)(struct net *net); struct module *me; /* A unique name... */ char name[XT_TABLE_MAXNAMELEN]; }; static struct list_head xt_templates[NFPROTO_NUMPROTO]; struct xt_pernet { struct list_head tables[NFPROTO_NUMPROTO]; }; struct compat_delta { unsigned int offset; /* offset in kernel */ int delta; /* delta in 32bit user land */ }; struct xt_af { struct mutex mutex; struct list_head match; struct list_head target; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct mutex compat_mutex; struct compat_delta *compat_tab; unsigned int number; /* number of slots in compat_tab[] */ unsigned int cur; /* number of used slots in compat_tab[] */ #endif }; static unsigned int xt_pernet_id __read_mostly; static struct xt_af *xt __read_mostly; static const char *const xt_prefix[NFPROTO_NUMPROTO] = { [NFPROTO_UNSPEC] = "x", [NFPROTO_IPV4] = "ip", [NFPROTO_ARP] = "arp", [NFPROTO_BRIDGE] = "eb", [NFPROTO_IPV6] = "ip6", }; /* Registration hooks for targets. */ int xt_register_target(struct xt_target *target) { u_int8_t af = target->family; mutex_lock(&xt[af].mutex); list_add(&target->list, &xt[af].target); mutex_unlock(&xt[af].mutex); return 0; } EXPORT_SYMBOL(xt_register_target); void xt_unregister_target(struct xt_target *target) { u_int8_t af = target->family; mutex_lock(&xt[af].mutex); list_del(&target->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_target); int xt_register_targets(struct xt_target *target, unsigned int n) { unsigned int i; int err = 0; for (i = 0; i < n; i++) { err = xt_register_target(&target[i]); if (err) goto err; } return err; err: if (i > 0) xt_unregister_targets(target, i); return err; } EXPORT_SYMBOL(xt_register_targets); void xt_unregister_targets(struct xt_target *target, unsigned int n) { while (n-- > 0) xt_unregister_target(&target[n]); } EXPORT_SYMBOL(xt_unregister_targets); int xt_register_match(struct xt_match *match) { u_int8_t af = match->family; mutex_lock(&xt[af].mutex); list_add(&match->list, &xt[af].match); mutex_unlock(&xt[af].mutex); return 0; } EXPORT_SYMBOL(xt_register_match); void xt_unregister_match(struct xt_match *match) { u_int8_t af = match->family; mutex_lock(&xt[af].mutex); list_del(&match->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_match); int xt_register_matches(struct xt_match *match, unsigned int n) { unsigned int i; int err = 0; for (i = 0; i < n; i++) { err = xt_register_match(&match[i]); if (err) goto err; } return err; err: if (i > 0) xt_unregister_matches(match, i); return err; } EXPORT_SYMBOL(xt_register_matches); void xt_unregister_matches(struct xt_match *match, unsigned int n) { while (n-- > 0) xt_unregister_match(&match[n]); } EXPORT_SYMBOL(xt_unregister_matches); /* * These are weird, but module loading must not be done with mutex * held (since they will register), and we have to have a single * function to use. */ /* Find match, grabs ref. Returns ERR_PTR() on error. */ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) { struct xt_match *m; int err = -ENOENT; if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) return ERR_PTR(-EINVAL); mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { if (m->revision == revision) { if (try_module_get(m->me)) { mutex_unlock(&xt[af].mutex); return m; } } else err = -EPROTOTYPE; /* Found something. */ } } mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC) /* Try searching again in the family-independent list */ return xt_find_match(NFPROTO_UNSPEC, name, revision); return ERR_PTR(err); } EXPORT_SYMBOL(xt_find_match); struct xt_match * xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) { struct xt_match *match; if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) return ERR_PTR(-EINVAL); match = xt_find_match(nfproto, name, revision); if (IS_ERR(match)) { request_module("%st_%s", xt_prefix[nfproto], name); match = xt_find_match(nfproto, name, revision); } return match; } EXPORT_SYMBOL_GPL(xt_request_find_match); /* Find target, grabs ref. Returns ERR_PTR() on error. */ static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; int err = -ENOENT; if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) return ERR_PTR(-EINVAL); mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { if (t->revision == revision) { if (try_module_get(t->me)) { mutex_unlock(&xt[af].mutex); return t; } } else err = -EPROTOTYPE; /* Found something. */ } } mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC) /* Try searching again in the family-independent list */ return xt_find_target(NFPROTO_UNSPEC, name, revision); return ERR_PTR(err); } struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) return ERR_PTR(-EINVAL); target = xt_find_target(af, name, revision); if (IS_ERR(target)) { request_module("%st_%s", xt_prefix[af], name); target = xt_find_target(af, name, revision); } return target; } EXPORT_SYMBOL_GPL(xt_request_find_target); static int xt_obj_to_user(u16 __user *psize, u16 size, void __user *pname, const char *name, u8 __user *prev, u8 rev) { if (put_user(size, psize)) return -EFAULT; if (copy_to_user(pname, name, strlen(name) + 1)) return -EFAULT; if (put_user(rev, prev)) return -EFAULT; return 0; } #define XT_OBJ_TO_USER(U, K, TYPE, C_SIZE) \ xt_obj_to_user(&U->u.TYPE##_size, C_SIZE ? : K->u.TYPE##_size, \ U->u.user.name, K->u.kernel.TYPE->name, \ &U->u.user.revision, K->u.kernel.TYPE->revision) int xt_data_to_user(void __user *dst, const void *src, int usersize, int size, int aligned_size) { usersize = usersize ? : size; if (copy_to_user(dst, src, usersize)) return -EFAULT; if (usersize != aligned_size && clear_user(dst + usersize, aligned_size - usersize)) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(xt_data_to_user); #define XT_DATA_TO_USER(U, K, TYPE) \ xt_data_to_user(U->data, K->data, \ K->u.kernel.TYPE->usersize, \ K->u.kernel.TYPE->TYPE##size, \ XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u) { return XT_OBJ_TO_USER(u, m, match, 0) || XT_DATA_TO_USER(u, m, match); } EXPORT_SYMBOL_GPL(xt_match_to_user); int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u) { return XT_OBJ_TO_USER(u, t, target, 0) || XT_DATA_TO_USER(u, t, target); } EXPORT_SYMBOL_GPL(xt_target_to_user); static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_match *m; int have_rev = 0; mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { if (m->revision > *bestp) *bestp = m->revision; if (m->revision == revision) have_rev = 1; } } mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return match_revfn(NFPROTO_UNSPEC, name, revision, bestp); return have_rev; } static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_target *t; int have_rev = 0; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { if (t->revision > *bestp) *bestp = t->revision; if (t->revision == revision) have_rev = 1; } } mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return target_revfn(NFPROTO_UNSPEC, name, revision, bestp); return have_rev; } /* Returns true or false (if no such extension at all) */ int xt_find_revision(u8 af, const char *name, u8 revision, int target, int *err) { int have_rev, best = -1; if (target == 1) have_rev = target_revfn(af, name, revision, &best); else have_rev = match_revfn(af, name, revision, &best); /* Nothing at all? Return 0 to try loading module. */ if (best == -1) { *err = -ENOENT; return 0; } *err = best; if (!have_rev) *err = -EPROTONOSUPPORT; return 1; } EXPORT_SYMBOL_GPL(xt_find_revision); static char * textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto) { static const char *const inetbr_names[] = { "PREROUTING", "INPUT", "FORWARD", "OUTPUT", "POSTROUTING", "BROUTING", }; static const char *const arp_names[] = { "INPUT", "FORWARD", "OUTPUT", }; const char *const *names; unsigned int i, max; char *p = buf; bool np = false; int res; names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names; max = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) : ARRAY_SIZE(inetbr_names); *p = '\0'; for (i = 0; i < max; ++i) { if (!(mask & (1 << i))) continue; res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]); if (res > 0) { size -= res; p += res; } np = true; } return buf; } /** * xt_check_proc_name - check that name is suitable for /proc file creation * * @name: file name candidate * @size: length of buffer * * some x_tables modules wish to create a file in /proc. * This function makes sure that the name is suitable for this * purpose, it checks that name is NUL terminated and isn't a 'special' * name, like "..". * * returns negative number on error or 0 if name is useable. */ int xt_check_proc_name(const char *name, unsigned int size) { if (name[0] == '\0') return -EINVAL; if (strnlen(name, size) == size) return -ENAMETOOLONG; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || strchr(name, '/')) return -EINVAL; return 0; } EXPORT_SYMBOL(xt_check_proc_name); int xt_check_match(struct xt_mtchk_param *par, unsigned int size, u16 proto, bool inv_proto) { int ret; if (XT_ALIGN(par->match->matchsize) != size && par->match->matchsize != -1) { /* * ebt_among is exempt from centralized matchsize checking * because it uses a dynamic-size data set. */ pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n", xt_prefix[par->family], par->match->name, par->match->revision, XT_ALIGN(par->match->matchsize), size); return -EINVAL; } if (par->match->table != NULL && strcmp(par->match->table, par->table) != 0) { pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n", xt_prefix[par->family], par->match->name, par->match->table, par->table); return -EINVAL; } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n", xt_prefix[par->family], par->match->name, textify_hooks(used, sizeof(used), par->hook_mask, par->family), textify_hooks(allow, sizeof(allow), par->match->hooks, par->family)); return -EINVAL; } if (par->match->proto && (par->match->proto != proto || inv_proto)) { pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n", xt_prefix[par->family], par->match->name, par->match->proto); return -EINVAL; } if (par->match->checkentry != NULL) { ret = par->match->checkentry(par); if (ret < 0) return ret; else if (ret > 0) /* Flag up potential errors. */ return -EIO; } return 0; } EXPORT_SYMBOL_GPL(xt_check_match); /** xt_check_entry_match - check that matches end before start of target * * @match: beginning of xt_entry_match * @target: beginning of this rules target (alleged end of matches) * @alignment: alignment requirement of match structures * * Validates that all matches add up to the beginning of the target, * and that each match covers at least the base structure size. * * Return: 0 on success, negative errno on failure. */ static int xt_check_entry_match(const char *match, const char *target, const size_t alignment) { const struct xt_entry_match *pos; int length = target - match; if (length == 0) /* no matches */ return 0; pos = (struct xt_entry_match *)match; do { if ((unsigned long)pos % alignment) return -EINVAL; if (length < (int)sizeof(struct xt_entry_match)) return -EINVAL; if (pos->u.match_size < sizeof(struct xt_entry_match)) return -EINVAL; if (pos->u.match_size > length) return -EINVAL; length -= pos->u.match_size; pos = ((void *)((char *)(pos) + (pos)->u.match_size)); } while (length > 0); return 0; } /** xt_check_table_hooks - check hook entry points are sane * * @info xt_table_info to check * @valid_hooks - hook entry points that we can enter from * * Validates that the hook entry and underflows points are set up. * * Return: 0 on success, negative errno on failure. */ int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks) { const char *err = "unsorted underflow"; unsigned int i, max_uflow, max_entry; bool check_hooks = false; BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow)); max_entry = 0; max_uflow = 0; for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) { if (!(valid_hooks & (1 << i))) continue; if (info->hook_entry[i] == 0xFFFFFFFF) return -EINVAL; if (info->underflow[i] == 0xFFFFFFFF) return -EINVAL; if (check_hooks) { if (max_uflow > info->underflow[i]) goto error; if (max_uflow == info->underflow[i]) { err = "duplicate underflow"; goto error; } if (max_entry > info->hook_entry[i]) { err = "unsorted entry"; goto error; } if (max_entry == info->hook_entry[i]) { err = "duplicate entry"; goto error; } } max_entry = info->hook_entry[i]; max_uflow = info->underflow[i]; check_hooks = true; } return 0; error: pr_err_ratelimited("%s at hook %d\n", err, i); return -EINVAL; } EXPORT_SYMBOL(xt_check_table_hooks); static bool verdict_ok(int verdict) { if (verdict > 0) return true; if (verdict < 0) { int v = -verdict - 1; if (verdict == XT_RETURN) return true; switch (v) { case NF_ACCEPT: return true; case NF_DROP: return true; case NF_QUEUE: return true; default: break; } return false; } return false; } static bool error_tg_ok(unsigned int usersize, unsigned int kernsize, const char *msg, unsigned int msglen) { return usersize == kernsize && strnlen(msg, msglen) < msglen; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { struct xt_af *xp = &xt[af]; WARN_ON(!mutex_is_locked(&xt[af].compat_mutex)); if (WARN_ON(!xp->compat_tab)) return -ENOMEM; if (xp->cur >= xp->number) return -EINVAL; if (xp->cur) delta += xp->compat_tab[xp->cur - 1].delta; xp->compat_tab[xp->cur].offset = offset; xp->compat_tab[xp->cur].delta = delta; xp->cur++; return 0; } EXPORT_SYMBOL_GPL(xt_compat_add_offset); void xt_compat_flush_offsets(u_int8_t af) { WARN_ON(!mutex_is_locked(&xt[af].compat_mutex)); if (xt[af].compat_tab) { vfree(xt[af].compat_tab); xt[af].compat_tab = NULL; xt[af].number = 0; xt[af].cur = 0; } } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); int xt_compat_calc_jump(u_int8_t af, unsigned int offset) { struct compat_delta *tmp = xt[af].compat_tab; int mid, left = 0, right = xt[af].cur - 1; while (left <= right) { mid = (left + right) >> 1; if (offset > tmp[mid].offset) left = mid + 1; else if (offset < tmp[mid].offset) right = mid - 1; else return mid ? tmp[mid - 1].delta : 0; } return left ? tmp[left - 1].delta : 0; } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); int xt_compat_init_offsets(u8 af, unsigned int number) { size_t mem; WARN_ON(!mutex_is_locked(&xt[af].compat_mutex)); if (!number || number > (INT_MAX / sizeof(struct compat_delta))) return -EINVAL; if (WARN_ON(xt[af].compat_tab)) return -EINVAL; mem = sizeof(struct compat_delta) * number; if (mem > XT_MAX_TABLE_SIZE) return -ENOMEM; xt[af].compat_tab = vmalloc(mem); if (!xt[af].compat_tab) return -ENOMEM; xt[af].number = number; xt[af].cur = 0; return 0; } EXPORT_SYMBOL(xt_compat_init_offsets); int xt_compat_match_offset(const struct xt_match *match) { u_int16_t csize = match->compatsize ? : match->matchsize; return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize); } EXPORT_SYMBOL_GPL(xt_compat_match_offset); void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; char name[sizeof(m->u.user.name)]; m = *dstptr; memcpy(m, cm, sizeof(*cm)); if (match->compat_from_user) match->compat_from_user(m->data, cm->data); else memcpy(m->data, cm->data, msize - sizeof(*cm)); msize += off; m->u.user.match_size = msize; strscpy(name, match->name, sizeof(name)); module_put(match->me); strscpy_pad(m->u.user.name, name, sizeof(m->u.user.name)); *size += off; *dstptr += msize; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); #define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ xt_data_to_user(U->data, K->data, \ K->u.kernel.TYPE->usersize, \ C_SIZE, \ COMPAT_XT_ALIGN(C_SIZE)) int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match __user *cm = *dstptr; int off = xt_compat_match_offset(match); u_int16_t msize = m->u.user.match_size - off; if (XT_OBJ_TO_USER(cm, m, match, msize)) return -EFAULT; if (match->compat_to_user) { if (match->compat_to_user((void __user *)cm->data, m->data)) return -EFAULT; } else { if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) return -EFAULT; } *size -= off; *dstptr += msize; return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); /* non-compat version may have padding after verdict */ struct compat_xt_standard_target { struct compat_xt_entry_target t; compat_uint_t verdict; }; struct compat_xt_error_target { struct compat_xt_entry_target t; char errorname[XT_FUNCTION_MAXNAMELEN]; }; int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) { long size_of_base_struct = elems - (const char *)base; const struct compat_xt_entry_target *t; const char *e = base; if (target_offset < size_of_base_struct) return -EINVAL; if (target_offset + sizeof(*t) > next_offset) return -EINVAL; t = (void *)(e + target_offset); if (t->u.target_size < sizeof(*t)) return -EINVAL; if (target_offset + t->u.target_size > next_offset) return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) { const struct compat_xt_standard_target *st = (const void *)t; if (COMPAT_XT_ALIGN(target_offset + sizeof(*st)) != next_offset) return -EINVAL; if (!verdict_ok(st->verdict)) return -EINVAL; } else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) { const struct compat_xt_error_target *et = (const void *)t; if (!error_tg_ok(t->u.target_size, sizeof(*et), et->errorname, sizeof(et->errorname))) return -EINVAL; } /* compat_xt_entry match has less strict alignment requirements, * otherwise they are identical. In case of padding differences * we need to add compat version of xt_check_entry_match. */ BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match)); return xt_check_entry_match(elems, base + target_offset, __alignof__(struct compat_xt_entry_match)); } EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_NETFILTER_XTABLES_COMPAT */ /** * xt_check_entry_offsets - validate arp/ip/ip6t_entry * * @base: pointer to arp/ip/ip6t_entry * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems * @target_offset: the arp/ip/ip6_t->target_offset * @next_offset: the arp/ip/ip6_t->next_offset * * validates that target_offset and next_offset are sane and that all * match sizes (if any) align with the target offset. * * This function does not validate the targets or matches themselves, it * only tests that all the offsets and sizes are correct, that all * match structures are aligned, and that the last structure ends where * the target structure begins. * * Also see xt_compat_check_entry_offsets for CONFIG_NETFILTER_XTABLES_COMPAT version. * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location * - base to base + next_offset must be accessible, i.e. not exceed allocated * length. * * A well-formed entry looks like this: * * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry * e->elems[]-----' | | * matchsize | | * matchsize | | * | | * target_offset---------------------------------' | * next_offset---------------------------------------------------' * * elems[]: flexible array member at end of ip(6)/arpt_entry struct. * This is where matches (if any) and the target reside. * target_offset: beginning of target. * next_offset: start of the next rule; also: size of this rule. * Since targets have a minimum size, target_offset + minlen <= next_offset. * * Every match stores its size, sum of sizes must not exceed target_offset. * * Return: 0 on success, negative errno on failure. */ int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) { long size_of_base_struct = elems - (const char *)base; const struct xt_entry_target *t; const char *e = base; /* target start is within the ip/ip6/arpt_entry struct */ if (target_offset < size_of_base_struct) return -EINVAL; if (target_offset + sizeof(*t) > next_offset) return -EINVAL; t = (void *)(e + target_offset); if (t->u.target_size < sizeof(*t)) return -EINVAL; if (target_offset + t->u.target_size > next_offset) return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) { const struct xt_standard_target *st = (const void *)t; if (XT_ALIGN(target_offset + sizeof(*st)) != next_offset) return -EINVAL; if (!verdict_ok(st->verdict)) return -EINVAL; } else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) { const struct xt_error_target *et = (const void *)t; if (!error_tg_ok(t->u.target_size, sizeof(*et), et->errorname, sizeof(et->errorname))) return -EINVAL; } return xt_check_entry_match(elems, base + target_offset, __alignof__(struct xt_entry_match)); } EXPORT_SYMBOL(xt_check_entry_offsets); /** * xt_alloc_entry_offsets - allocate array to store rule head offsets * * @size: number of entries * * Return: NULL or zeroed kmalloc'd or vmalloc'd array */ unsigned int *xt_alloc_entry_offsets(unsigned int size) { if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int)) return NULL; return kvcalloc(size, sizeof(unsigned int), GFP_KERNEL); } EXPORT_SYMBOL(xt_alloc_entry_offsets); /** * xt_find_jump_offset - check if target is a valid jump offset * * @offsets: array containing all valid rule start offsets of a rule blob * @target: the jump target to search for * @size: entries in @offset */ bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size) { int m, low = 0, hi = size; while (hi > low) { m = (low + hi) / 2u; if (offsets[m] > target) hi = m; else if (offsets[m] < target) low = m + 1; else return true; } return false; } EXPORT_SYMBOL(xt_find_jump_offset); int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u16 proto, bool inv_proto) { int ret; if (XT_ALIGN(par->target->targetsize) != size) { pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", xt_prefix[par->family], par->target->name, par->target->revision, XT_ALIGN(par->target->targetsize), size); return -EINVAL; } if (par->target->table != NULL && strcmp(par->target->table, par->table) != 0) { pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n", xt_prefix[par->family], par->target->name, par->target->table, par->table); return -EINVAL; } if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64]; pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n", xt_prefix[par->family], par->target->name, textify_hooks(used, sizeof(used), par->hook_mask, par->family), textify_hooks(allow, sizeof(allow), par->target->hooks, par->family)); return -EINVAL; } if (par->target->proto && (par->target->proto != proto || inv_proto)) { pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n", xt_prefix[par->family], par->target->name, par->target->proto); return -EINVAL; } if (par->target->checkentry != NULL) { ret = par->target->checkentry(par); if (ret < 0) return ret; else if (ret > 0) /* Flag up potential errors. */ return -EIO; } return 0; } EXPORT_SYMBOL_GPL(xt_check_target); /** * xt_copy_counters - copy counters and metadata from a sockptr_t * * @arg: src sockptr * @len: alleged size of userspace memory * @info: where to store the xt_counters_info metadata * * Copies counter meta data from @user and stores it in @info. * * vmallocs memory to hold the counters, then copies the counter data * from @user to the new memory and returns a pointer to it. * * If called from a compat syscall, @info gets converted automatically to the * 64bit representation. * * The metadata associated with the counters is stored in @info. * * Return: returns pointer that caller has to test via IS_ERR(). * If IS_ERR is false, caller has to vfree the pointer. */ void *xt_copy_counters(sockptr_t arg, unsigned int len, struct xt_counters_info *info) { size_t offset; void *mem; u64 size; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) { /* structures only differ in size due to alignment */ struct compat_xt_counters_info compat_tmp; if (len <= sizeof(compat_tmp)) return ERR_PTR(-EINVAL); len -= sizeof(compat_tmp); if (copy_from_sockptr(&compat_tmp, arg, sizeof(compat_tmp)) != 0) return ERR_PTR(-EFAULT); memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1); info->num_counters = compat_tmp.num_counters; offset = sizeof(compat_tmp); } else #endif { if (len <= sizeof(*info)) return ERR_PTR(-EINVAL); len -= sizeof(*info); if (copy_from_sockptr(info, arg, sizeof(*info)) != 0) return ERR_PTR(-EFAULT); offset = sizeof(*info); } info->name[sizeof(info->name) - 1] = '\0'; size = sizeof(struct xt_counters); size *= info->num_counters; if (size != (u64)len) return ERR_PTR(-EINVAL); mem = vmalloc(len); if (!mem) return ERR_PTR(-ENOMEM); if (copy_from_sockptr_offset(mem, arg, offset, len) == 0) return mem; vfree(mem); return ERR_PTR(-EFAULT); } EXPORT_SYMBOL_GPL(xt_copy_counters); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT int xt_compat_target_offset(const struct xt_target *target) { u_int16_t csize = target->compatsize ? : target->targetsize; return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize); } EXPORT_SYMBOL_GPL(xt_compat_target_offset); void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size) { const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; char name[sizeof(t->u.user.name)]; t = *dstptr; memcpy(t, ct, sizeof(*ct)); if (target->compat_from_user) target->compat_from_user(t->data, ct->data); else unsafe_memcpy(t->data, ct->data, tsize - sizeof(*ct), /* UAPI 0-sized destination */); tsize += off; t->u.user.target_size = tsize; strscpy(name, target->name, sizeof(name)); module_put(target->me); strscpy_pad(t->u.user.name, name, sizeof(t->u.user.name)); *size += off; *dstptr += tsize; } EXPORT_SYMBOL_GPL(xt_compat_target_from_user); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size) { const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target __user *ct = *dstptr; int off = xt_compat_target_offset(target); u_int16_t tsize = t->u.user.target_size - off; if (XT_OBJ_TO_USER(ct, t, target, tsize)) return -EFAULT; if (target->compat_to_user) { if (target->compat_to_user((void __user *)ct->data, t->data)) return -EFAULT; } else { if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) return -EFAULT; } *size -= off; *dstptr += tsize; return 0; } EXPORT_SYMBOL_GPL(xt_compat_target_to_user); #endif struct xt_table_info *xt_alloc_table_info(unsigned int size) { struct xt_table_info *info = NULL; size_t sz = sizeof(*info) + size; if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE) return NULL; info = kvmalloc(sz, GFP_KERNEL_ACCOUNT); if (!info) return NULL; memset(info, 0, sizeof(*info)); info->size = size; return info; } EXPORT_SYMBOL(xt_alloc_table_info); void xt_free_table_info(struct xt_table_info *info) { int cpu; if (info->jumpstack != NULL) { for_each_possible_cpu(cpu) kvfree(info->jumpstack[cpu]); kvfree(info->jumpstack); } kvfree(info); } EXPORT_SYMBOL(xt_free_table_info); struct xt_table *xt_find_table(struct net *net, u8 af, const char *name) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); struct xt_table *t; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt_net->tables[af], list) { if (strcmp(t->name, name) == 0) { mutex_unlock(&xt[af].mutex); return t; } } mutex_unlock(&xt[af].mutex); return NULL; } EXPORT_SYMBOL(xt_find_table); /* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); struct module *owner = NULL; struct xt_template *tmpl; struct xt_table *t; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt_net->tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; /* Table doesn't exist in this netns, check larval list */ list_for_each_entry(tmpl, &xt_templates[af], list) { int err; if (strcmp(tmpl->name, name)) continue; if (!try_module_get(tmpl->me)) goto out; owner = tmpl->me; mutex_unlock(&xt[af].mutex); err = tmpl->table_init(net); if (err < 0) { module_put(owner); return ERR_PTR(err); } mutex_lock(&xt[af].mutex); break; } /* and once again: */ list_for_each_entry(t, &xt_net->tables[af], list) if (strcmp(t->name, name) == 0) return t; module_put(owner); out: mutex_unlock(&xt[af].mutex); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(xt_find_table_lock); struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, const char *name) { struct xt_table *t = xt_find_table_lock(net, af, name); #ifdef CONFIG_MODULES if (IS_ERR(t)) { int err = request_module("%stable_%s", xt_prefix[af], name); if (err < 0) return ERR_PTR(err); t = xt_find_table_lock(net, af, name); } #endif return t; } EXPORT_SYMBOL_GPL(xt_request_find_table_lock); void xt_table_unlock(struct xt_table *table) { mutex_unlock(&xt[table->af].mutex); } EXPORT_SYMBOL_GPL(xt_table_unlock); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT void xt_compat_lock(u_int8_t af) { mutex_lock(&xt[af].compat_mutex); } EXPORT_SYMBOL_GPL(xt_compat_lock); void xt_compat_unlock(u_int8_t af) { mutex_unlock(&xt[af].compat_mutex); } EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif DEFINE_PER_CPU(seqcount_t, xt_recseq); EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq); struct static_key xt_tee_enabled __read_mostly; EXPORT_SYMBOL_GPL(xt_tee_enabled); static int xt_jumpstack_alloc(struct xt_table_info *i) { unsigned int size; int cpu; size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) i->jumpstack = kvzalloc(size, GFP_KERNEL); else i->jumpstack = kzalloc(size, GFP_KERNEL); if (i->jumpstack == NULL) return -ENOMEM; /* ruleset without jumps -- no stack needed */ if (i->stacksize == 0) return 0; /* Jumpstack needs to be able to record two full callchains, one * from the first rule set traversal, plus one table reentrancy * via -j TEE without clobbering the callchain that brought us to * TEE target. * * This is done by allocating two jumpstacks per cpu, on reentry * the upper half of the stack is used. * * see the jumpstack setup in ipt_do_table() for more details. */ size = sizeof(void *) * i->stacksize * 2u; for_each_possible_cpu(cpu) { i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); if (i->jumpstack[cpu] == NULL) /* * Freeing will be done later on by the callers. The * chain is: xt_replace_table -> __do_replace -> * do_replace -> xt_free_table_info. */ return -ENOMEM; } return 0; } struct xt_counters *xt_counters_alloc(unsigned int counters) { struct xt_counters *mem; if (counters == 0 || counters > INT_MAX / sizeof(*mem)) return NULL; counters *= sizeof(*mem); if (counters > XT_MAX_TABLE_SIZE) return NULL; return vzalloc(counters); } EXPORT_SYMBOL(xt_counters_alloc); struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { struct xt_table_info *private; unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); if (ret < 0) { *error = ret; return NULL; } /* Do the substitution. */ local_bh_disable(); private = table->private; /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); local_bh_enable(); *error = -EAGAIN; return NULL; } newinfo->initial_entries = private->initial_entries; /* * Ensure contents of newinfo are visible before assigning to * private. */ smp_wmb(); table->private = newinfo; /* make sure all cpus see new ->private value */ smp_mb(); /* * Even though table entries have now been swapped, other CPU's * may still be using the old entries... */ local_bh_enable(); /* ... so wait for even xt_recseq on all cpus */ for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); u32 seq = raw_read_seqcount(s); if (seq & 1) { do { cond_resched(); cpu_relax(); } while (seq == raw_read_seqcount(s)); } } audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : AUDIT_XT_OP_REPLACE, GFP_KERNEL); return private; } EXPORT_SYMBOL_GPL(xt_replace_table); struct xt_table *xt_register_table(struct net *net, const struct xt_table *input_table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); struct xt_table_info *private; struct xt_table *t, *table; int ret; /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { ret = -ENOMEM; goto out; } mutex_lock(&xt[table->af].mutex); /* Don't autoload: we'd eat our tail... */ list_for_each_entry(t, &xt_net->tables[table->af], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; goto unlock; } } /* Simplifies replace_table code. */ table->private = bootstrap; if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; private = table->private; pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ private->initial_entries = private->number; list_add(&table->list, &xt_net->tables[table->af]); mutex_unlock(&xt[table->af].mutex); return table; unlock: mutex_unlock(&xt[table->af].mutex); kfree(table); out: return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(xt_register_table); void *xt_unregister_table(struct xt_table *table) { struct xt_table_info *private; mutex_lock(&xt[table->af].mutex); private = table->private; list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number, AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); kfree(table->ops); kfree(table); return private; } EXPORT_SYMBOL_GPL(xt_unregister_table); #ifdef CONFIG_PROC_FS static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { u8 af = (unsigned long)pde_data(file_inode(seq->file)); struct net *net = seq_file_net(seq); struct xt_pernet *xt_net; xt_net = net_generic(net, xt_pernet_id); mutex_lock(&xt[af].mutex); return seq_list_start(&xt_net->tables[af], *pos); } static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { u8 af = (unsigned long)pde_data(file_inode(seq->file)); struct net *net = seq_file_net(seq); struct xt_pernet *xt_net; xt_net = net_generic(net, xt_pernet_id); return seq_list_next(v, &xt_net->tables[af], pos); } static void xt_table_seq_stop(struct seq_file *seq, void *v) { u_int8_t af = (unsigned long)pde_data(file_inode(seq->file)); mutex_unlock(&xt[af].mutex); } static int xt_table_seq_show(struct seq_file *seq, void *v) { struct xt_table *table = list_entry(v, struct xt_table, list); if (*table->name) seq_printf(seq, "%s\n", table->name); return 0; } static const struct seq_operations xt_table_seq_ops = { .start = xt_table_seq_start, .next = xt_table_seq_next, .stop = xt_table_seq_stop, .show = xt_table_seq_show, }; /* * Traverse state for ip{,6}_{tables,matches} for helping crossing * the multi-AF mutexes. */ struct nf_mttg_trav { struct list_head *head, *curr; uint8_t class; }; enum { MTTG_TRAV_INIT, MTTG_TRAV_NFP_UNSPEC, MTTG_TRAV_NFP_SPEC, MTTG_TRAV_DONE, }; static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, bool is_target) { static const uint8_t next_class[] = { [MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC, [MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE, }; uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; if (ppos != NULL) ++(*ppos); switch (trav->class) { case MTTG_TRAV_INIT: trav->class = MTTG_TRAV_NFP_UNSPEC; mutex_lock(&xt[NFPROTO_UNSPEC].mutex); trav->head = trav->curr = is_target ? &xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match; break; case MTTG_TRAV_NFP_UNSPEC: trav->curr = trav->curr->next; if (trav->curr != trav->head) break; mutex_unlock(&xt[NFPROTO_UNSPEC].mutex); mutex_lock(&xt[nfproto].mutex); trav->head = trav->curr = is_target ? &xt[nfproto].target : &xt[nfproto].match; trav->class = next_class[trav->class]; break; case MTTG_TRAV_NFP_SPEC: trav->curr = trav->curr->next; if (trav->curr != trav->head) break; fallthrough; default: return NULL; } return trav; } static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos, bool is_target) { struct nf_mttg_trav *trav = seq->private; unsigned int j; trav->class = MTTG_TRAV_INIT; for (j = 0; j < *pos; ++j) if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL) return NULL; return trav; } static void xt_mttg_seq_stop(struct seq_file *seq, void *v) { uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; switch (trav->class) { case MTTG_TRAV_NFP_UNSPEC: mutex_unlock(&xt[NFPROTO_UNSPEC].mutex); break; case MTTG_TRAV_NFP_SPEC: mutex_unlock(&xt[nfproto].mutex); break; } } static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos) { return xt_mttg_seq_start(seq, pos, false); } static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos) { return xt_mttg_seq_next(seq, v, ppos, false); } static int xt_match_seq_show(struct seq_file *seq, void *v) { const struct nf_mttg_trav *trav = seq->private; const struct xt_match *match; switch (trav->class) { case MTTG_TRAV_NFP_UNSPEC: case MTTG_TRAV_NFP_SPEC: if (trav->curr == trav->head) return 0; match = list_entry(trav->curr, struct xt_match, list); if (*match->name) seq_printf(seq, "%s\n", match->name); } return 0; } static const struct seq_operations xt_match_seq_ops = { .start = xt_match_seq_start, .next = xt_match_seq_next, .stop = xt_mttg_seq_stop, .show = xt_match_seq_show, }; static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos) { return xt_mttg_seq_start(seq, pos, true); } static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos) { return xt_mttg_seq_next(seq, v, ppos, true); } static int xt_target_seq_show(struct seq_file *seq, void *v) { const struct nf_mttg_trav *trav = seq->private; const struct xt_target *target; switch (trav->class) { case MTTG_TRAV_NFP_UNSPEC: case MTTG_TRAV_NFP_SPEC: if (trav->curr == trav->head) return 0; target = list_entry(trav->curr, struct xt_target, list); if (*target->name) seq_printf(seq, "%s\n", target->name); } return 0; } static const struct seq_operations xt_target_seq_ops = { .start = xt_target_seq_start, .next = xt_target_seq_next, .stop = xt_mttg_seq_stop, .show = xt_target_seq_show, }; #define FORMAT_TABLES "_tables_names" #define FORMAT_MATCHES "_tables_matches" #define FORMAT_TARGETS "_tables_targets" #endif /* CONFIG_PROC_FS */ /** * xt_hook_ops_alloc - set up hooks for a new table * @table: table with metadata needed to set up hooks * @fn: Hook function * * This function will create the nf_hook_ops that the x_table needs * to hand to xt_hook_link_net(). */ struct nf_hook_ops * xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) { unsigned int hook_mask = table->valid_hooks; uint8_t i, num_hooks = hweight32(hook_mask); uint8_t hooknum; struct nf_hook_ops *ops; if (!num_hooks) return ERR_PTR(-EINVAL); ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0; hook_mask >>= 1, ++hooknum) { if (!(hook_mask & 1)) continue; ops[i].hook = fn; ops[i].pf = table->af; ops[i].hooknum = hooknum; ops[i].priority = table->priority; ++i; } return ops; } EXPORT_SYMBOL_GPL(xt_hook_ops_alloc); int xt_register_template(const struct xt_table *table, int (*table_init)(struct net *net)) { int ret = -EEXIST, af = table->af; struct xt_template *t; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt_templates[af], list) { if (WARN_ON_ONCE(strcmp(table->name, t->name) == 0)) goto out_unlock; } ret = -ENOMEM; t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) goto out_unlock; BUILD_BUG_ON(sizeof(t->name) != sizeof(table->name)); strscpy(t->name, table->name, sizeof(t->name)); t->table_init = table_init; t->me = table->me; list_add(&t->list, &xt_templates[af]); ret = 0; out_unlock: mutex_unlock(&xt[af].mutex); return ret; } EXPORT_SYMBOL_GPL(xt_register_template); void xt_unregister_template(const struct xt_table *table) { struct xt_template *t; int af = table->af; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt_templates[af], list) { if (strcmp(table->name, t->name)) continue; list_del(&t->list); mutex_unlock(&xt[af].mutex); kfree(t); return; } mutex_unlock(&xt[af].mutex); WARN_ON_ONCE(1); } EXPORT_SYMBOL_GPL(xt_unregister_template); int xt_proto_init(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; struct proc_dir_entry *proc; kuid_t root_uid; kgid_t root_gid; #endif if (af >= ARRAY_SIZE(xt_prefix)) return -EINVAL; #ifdef CONFIG_PROC_FS root_uid = make_kuid(net->user_ns, 0); root_gid = make_kgid(net->user_ns, 0); strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops, sizeof(struct seq_net_private), (void *)(unsigned long)af); if (!proc) goto out; if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(proc, root_uid, root_gid); strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); proc = proc_create_seq_private(buf, 0440, net->proc_net, &xt_match_seq_ops, sizeof(struct nf_mttg_trav), (void *)(unsigned long)af); if (!proc) goto out_remove_tables; if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(proc, root_uid, root_gid); strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); proc = proc_create_seq_private(buf, 0440, net->proc_net, &xt_target_seq_ops, sizeof(struct nf_mttg_trav), (void *)(unsigned long)af); if (!proc) goto out_remove_matches; if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(proc, root_uid, root_gid); #endif return 0; #ifdef CONFIG_PROC_FS out_remove_matches: strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); out_remove_tables: strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); out: return -1; #endif } EXPORT_SYMBOL_GPL(xt_proto_init); void xt_proto_fini(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); remove_proc_entry(buf, net->proc_net); strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); /** * xt_percpu_counter_alloc - allocate x_tables rule counter * * @state: pointer to xt_percpu allocation state * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct * * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then * contain the address of the real (percpu) counter. * * Rule evaluation needs to use xt_get_this_cpu_counter() helper * to fetch the real percpu counter. * * To speed up allocation and improve data locality, a 4kb block is * allocated. Freeing any counter may free an entire block, so all * counters allocated using the same state must be freed at the same * time. * * xt_percpu_counter_alloc_state contains the base address of the * allocated page and the current sub-offset. * * returns false on error. */ bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, struct xt_counters *counter) { BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2)); if (nr_cpu_ids <= 1) return true; if (!state->mem) { state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE, XT_PCPU_BLOCK_SIZE); if (!state->mem) return false; } counter->pcnt = (__force unsigned long)(state->mem + state->off); state->off += sizeof(*counter); if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) { state->mem = NULL; state->off = 0; } return true; } EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc); void xt_percpu_counter_free(struct xt_counters *counters) { unsigned long pcnt = counters->pcnt; if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0) free_percpu((void __percpu *)pcnt); } EXPORT_SYMBOL_GPL(xt_percpu_counter_free); static int __net_init xt_net_init(struct net *net) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; for (i = 0; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&xt_net->tables[i]); return 0; } static void __net_exit xt_net_exit(struct net *net) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; for (i = 0; i < NFPROTO_NUMPROTO; i++) WARN_ON_ONCE(!list_empty(&xt_net->tables[i])); } static struct pernet_operations xt_net_ops = { .init = xt_net_init, .exit = xt_net_exit, .id = &xt_pernet_id, .size = sizeof(struct xt_pernet), }; static int __init xt_init(void) { unsigned int i; int rv; for_each_possible_cpu(i) { seqcount_init(&per_cpu(xt_recseq, i)); } xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); if (!xt) return -ENOMEM; for (i = 0; i < NFPROTO_NUMPROTO; i++) { mutex_init(&xt[i].mutex); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT mutex_init(&xt[i].compat_mutex); xt[i].compat_tab = NULL; #endif INIT_LIST_HEAD(&xt[i].target); INIT_LIST_HEAD(&xt[i].match); INIT_LIST_HEAD(&xt_templates[i]); } rv = register_pernet_subsys(&xt_net_ops); if (rv < 0) kfree(xt); return rv; } static void __exit xt_fini(void) { unregister_pernet_subsys(&xt_net_ops); kfree(xt); } module_init(xt_init); module_exit(xt_fini); |
4 4 4 4 4 4 4 800 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/buffer.c * * Copyright (C) 1991, 1992, 2002 Linus Torvalds */ /* * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 * * Removed a lot of unnecessary code and simplified things now that * the buffer cache isn't our primary cache - Andrew Tridgell 12/96 * * Speed up hash, lru, and free list operations. Use gfp() for allocating * hash table, use SLAB cache for buffer heads. SMP threading. -DaveM * * Added 32k buffer block sizes - these are required older ARM systems. - RMK * * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/syscalls.h> #include <linux/fs.h> #include <linux/iomap.h> #include <linux/mm.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/capability.h> #include <linux/blkdev.h> #include <linux/file.h> #include <linux/quotaops.h> #include <linux/highmem.h> #include <linux/export.h> #include <linux/backing-dev.h> #include <linux/writeback.h> #include <linux/hash.h> #include <linux/suspend.h> #include <linux/buffer_head.h> #include <linux/task_io_accounting_ops.h> #include <linux/bio.h> #include <linux/cpu.h> #include <linux/bitops.h> #include <linux/mpage.h> #include <linux/bit_spinlock.h> #include <linux/pagevec.h> #include <linux/sched/mm.h> #include <trace/events/block.h> #include <linux/fscrypt.h> #include <linux/fsverity.h> #include <linux/sched/isolation.h> #include "internal.h" static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, enum rw_hint hint, struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) inline void touch_buffer(struct buffer_head *bh) { trace_block_touch_buffer(bh); folio_mark_accessed(bh->b_folio); } EXPORT_SYMBOL(touch_buffer); void __lock_buffer(struct buffer_head *bh) { wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(__lock_buffer); void unlock_buffer(struct buffer_head *bh) { clear_bit_unlock(BH_Lock, &bh->b_state); smp_mb__after_atomic(); wake_up_bit(&bh->b_state, BH_Lock); } EXPORT_SYMBOL(unlock_buffer); /* * Returns if the folio has dirty or writeback buffers. If all the buffers * are unlocked and clean then the folio_test_dirty information is stale. If * any of the buffers are locked, it is assumed they are locked for IO. */ void buffer_check_dirty_writeback(struct folio *folio, bool *dirty, bool *writeback) { struct buffer_head *head, *bh; *dirty = false; *writeback = false; BUG_ON(!folio_test_locked(folio)); head = folio_buffers(folio); if (!head) return; if (folio_test_writeback(folio)) *writeback = true; bh = head; do { if (buffer_locked(bh)) *writeback = true; if (buffer_dirty(bh)) *dirty = true; bh = bh->b_this_page; } while (bh != head); } /* * Block until a buffer comes unlocked. This doesn't stop it * from becoming locked again - you have to lock it yourself * if you want to preserve its state. */ void __wait_on_buffer(struct buffer_head * bh) { wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(__wait_on_buffer); static void buffer_io_error(struct buffer_head *bh, char *msg) { if (!test_bit(BH_Quiet, &bh->b_state)) printk_ratelimited(KERN_ERR "Buffer I/O error on dev %pg, logical block %llu%s\n", bh->b_bdev, (unsigned long long)bh->b_blocknr, msg); } /* * End-of-IO handler helper function which does not touch the bh after * unlocking it. * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but * a race there is benign: unlock_buffer() only use the bh's address for * hashing after unlocking the buffer, so it doesn't actually touch the bh * itself. */ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) { if (uptodate) { set_buffer_uptodate(bh); } else { /* This happens, due to failed read-ahead attempts. */ clear_buffer_uptodate(bh); } unlock_buffer(bh); } /* * Default synchronous end-of-IO handler.. Just mark it up-to-date and * unlock the buffer. */ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) { __end_buffer_read_notouch(bh, uptodate); put_bh(bh); } EXPORT_SYMBOL(end_buffer_read_sync); void end_buffer_write_sync(struct buffer_head *bh, int uptodate) { if (uptodate) { set_buffer_uptodate(bh); } else { buffer_io_error(bh, ", lost sync page write"); mark_buffer_write_io_error(bh); clear_buffer_uptodate(bh); } unlock_buffer(bh); put_bh(bh); } EXPORT_SYMBOL(end_buffer_write_sync); /* * Various filesystems appear to want __find_get_block to be non-blocking. * But it's the page lock which protects the buffers. To get around this, * we get exclusion from try_to_free_buffers with the blockdev mapping's * i_private_lock. * * Hack idea: for the blockdev mapping, i_private_lock contention * may be quite high. This code could TryLock the page, and if that * succeeds, there is no need to take i_private_lock. */ static struct buffer_head * __find_get_block_slow(struct block_device *bdev, sector_t block) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; struct buffer_head *ret = NULL; pgoff_t index; struct buffer_head *bh; struct buffer_head *head; struct folio *folio; int all_mapped = 1; static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); index = ((loff_t)block << blkbits) / PAGE_SIZE; folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0); if (IS_ERR(folio)) goto out; spin_lock(&bd_mapping->i_private_lock); head = folio_buffers(folio); if (!head) goto out_unlock; bh = head; do { if (!buffer_mapped(bh)) all_mapped = 0; else if (bh->b_blocknr == block) { ret = bh; get_bh(bh); goto out_unlock; } bh = bh->b_this_page; } while (bh != head); /* we might be here because some of the buffers on this page are * not mapped. This is due to various races between * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); if (all_mapped && __ratelimit(&last_warned)) { printk("__find_get_block_slow() failed. block=%llu, " "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " "device %pg blocksize: %d\n", (unsigned long long)block, (unsigned long long)bh->b_blocknr, bh->b_state, bh->b_size, bdev, 1 << blkbits); } out_unlock: spin_unlock(&bd_mapping->i_private_lock); folio_put(folio); out: return ret; } static void end_buffer_async_read(struct buffer_head *bh, int uptodate) { unsigned long flags; struct buffer_head *first; struct buffer_head *tmp; struct folio *folio; int folio_uptodate = 1; BUG_ON(!buffer_async_read(bh)); folio = bh->b_folio; if (uptodate) { set_buffer_uptodate(bh); } else { clear_buffer_uptodate(bh); buffer_io_error(bh, ", async page read"); } /* * Be _very_ careful from here on. Bad things can happen if * two buffer heads end IO at almost the same time and both * decide that the page is now completely done. */ first = folio_buffers(folio); spin_lock_irqsave(&first->b_uptodate_lock, flags); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; do { if (!buffer_uptodate(tmp)) folio_uptodate = 0; if (buffer_async_read(tmp)) { BUG_ON(!buffer_locked(tmp)); goto still_busy; } tmp = tmp->b_this_page; } while (tmp != bh); spin_unlock_irqrestore(&first->b_uptodate_lock, flags); folio_end_read(folio, folio_uptodate); return; still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); return; } struct postprocess_bh_ctx { struct work_struct work; struct buffer_head *bh; }; static void verify_bh(struct work_struct *work) { struct postprocess_bh_ctx *ctx = container_of(work, struct postprocess_bh_ctx, work); struct buffer_head *bh = ctx->bh; bool valid; valid = fsverity_verify_blocks(bh->b_folio, bh->b_size, bh_offset(bh)); end_buffer_async_read(bh, valid); kfree(ctx); } static bool need_fsverity(struct buffer_head *bh) { struct folio *folio = bh->b_folio; struct inode *inode = folio->mapping->host; return fsverity_active(inode) && /* needed by ext4 */ folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); } static void decrypt_bh(struct work_struct *work) { struct postprocess_bh_ctx *ctx = container_of(work, struct postprocess_bh_ctx, work); struct buffer_head *bh = ctx->bh; int err; err = fscrypt_decrypt_pagecache_blocks(bh->b_folio, bh->b_size, bh_offset(bh)); if (err == 0 && need_fsverity(bh)) { /* * We use different work queues for decryption and for verity * because verity may require reading metadata pages that need * decryption, and we shouldn't recurse to the same workqueue. */ INIT_WORK(&ctx->work, verify_bh); fsverity_enqueue_verify_work(&ctx->work); return; } end_buffer_async_read(bh, err == 0); kfree(ctx); } /* * I/O completion handler for block_read_full_folio() - pages * which come unlocked at the end of I/O. */ static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) { struct inode *inode = bh->b_folio->mapping->host; bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode); bool verify = need_fsverity(bh); /* Decrypt (with fscrypt) and/or verify (with fsverity) if needed. */ if (uptodate && (decrypt || verify)) { struct postprocess_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); if (ctx) { ctx->bh = bh; if (decrypt) { INIT_WORK(&ctx->work, decrypt_bh); fscrypt_enqueue_decrypt_work(&ctx->work); } else { INIT_WORK(&ctx->work, verify_bh); fsverity_enqueue_verify_work(&ctx->work); } return; } uptodate = 0; } end_buffer_async_read(bh, uptodate); } /* * Completion handler for block_write_full_folio() - folios which are unlocked * during I/O, and which have the writeback flag cleared upon I/O completion. */ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) { unsigned long flags; struct buffer_head *first; struct buffer_head *tmp; struct folio *folio; BUG_ON(!buffer_async_write(bh)); folio = bh->b_folio; if (uptodate) { set_buffer_uptodate(bh); } else { buffer_io_error(bh, ", lost async page write"); mark_buffer_write_io_error(bh); clear_buffer_uptodate(bh); } first = folio_buffers(folio); spin_lock_irqsave(&first->b_uptodate_lock, flags); clear_buffer_async_write(bh); unlock_buffer(bh); tmp = bh->b_this_page; while (tmp != bh) { if (buffer_async_write(tmp)) { BUG_ON(!buffer_locked(tmp)); goto still_busy; } tmp = tmp->b_this_page; } spin_unlock_irqrestore(&first->b_uptodate_lock, flags); folio_end_writeback(folio); return; still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); return; } /* * If a page's buffers are under async readin (end_buffer_async_read * completion) then there is a possibility that another thread of * control could lock one of the buffers after it has completed * but while some of the other buffers have not completed. This * locked buffer would confuse end_buffer_async_read() into not unlocking * the page. So the absence of BH_Async_Read tells end_buffer_async_read() * that this buffer is not under async I/O. * * The page comes unlocked when it has no locked buffer_async buffers * left. * * PageLocked prevents anyone starting new async I/O reads any of * the buffers. * * PageWriteback is used to prevent simultaneous writeout of the same * page. * * PageLocked prevents anyone from starting writeback of a page which is * under read I/O (PageWriteback is only ever set against a locked page). */ static void mark_buffer_async_read(struct buffer_head *bh) { bh->b_end_io = end_buffer_async_read_io; set_buffer_async_read(bh); } static void mark_buffer_async_write_endio(struct buffer_head *bh, bh_end_io_t *handler) { bh->b_end_io = handler; set_buffer_async_write(bh); } void mark_buffer_async_write(struct buffer_head *bh) { mark_buffer_async_write_endio(bh, end_buffer_async_write); } EXPORT_SYMBOL(mark_buffer_async_write); /* * fs/buffer.c contains helper functions for buffer-backed address space's * fsync functions. A common requirement for buffer-based filesystems is * that certain data from the backing blockdev needs to be written out for * a successful fsync(). For example, ext2 indirect blocks need to be * written back and waited upon before fsync() returns. * * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(), * inode_has_buffers() and invalidate_inode_buffers() are provided for the * management of a list of dependent buffers at ->i_mapping->i_private_list. * * Locking is a little subtle: try_to_free_buffers() will remove buffers * from their controlling inode's queue when they are being freed. But * try_to_free_buffers() will be operating against the *blockdev* mapping * at the time, not against the S_ISREG file which depends on those buffers. * So the locking for i_private_list is via the i_private_lock in the address_space * which backs the buffers. Which is different from the address_space * against which the buffers are listed. So for a particular address_space, * mapping->i_private_lock does *not* protect mapping->i_private_list! In fact, * mapping->i_private_list will always be protected by the backing blockdev's * ->i_private_lock. * * Which introduces a requirement: all buffers on an address_space's * ->i_private_list must be from the same address_space: the blockdev's. * * address_spaces which do not place buffers at ->i_private_list via these * utility functions are free to use i_private_lock and i_private_list for * whatever they want. The only requirement is that list_empty(i_private_list) * be true at clear_inode() time. * * FIXME: clear_inode should not call invalidate_inode_buffers(). The * filesystems should do that. invalidate_inode_buffers() should just go * BUG_ON(!list_empty). * * FIXME: mark_buffer_dirty_inode() is a data-plane operation. It should * take an address_space, not an inode. And it should be called * mark_buffer_dirty_fsync() to clearly define why those buffers are being * queued up. * * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the * list if it is already on a list. Because if the buffer is on a list, * it *must* already be on the right one. If not, the filesystem is being * silly. This will save a ton of locking. But first we have to ensure * that buffers are taken *off* the old inode's list when they are freed * (presumably in truncate). That requires careful auditing of all * filesystems (do it inside bforget()). It could also be done by bringing * b_inode back. */ /* * The buffer's backing address_space's i_private_lock must be held */ static void __remove_assoc_queue(struct buffer_head *bh) { list_del_init(&bh->b_assoc_buffers); WARN_ON(!bh->b_assoc_map); bh->b_assoc_map = NULL; } int inode_has_buffers(struct inode *inode) { return !list_empty(&inode->i_data.i_private_list); } /* * osync is designed to support O_SYNC io. It waits synchronously for * all already-submitted IO to complete, but does not queue any new * writes to the disk. * * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer * as you dirty the buffers, and then use osync_inode_buffers to wait for * completion. Any other dirty buffers which are not yet queued for * write will not be flushed to disk by the osync. */ static int osync_buffers_list(spinlock_t *lock, struct list_head *list) { struct buffer_head *bh; struct list_head *p; int err = 0; spin_lock(lock); repeat: list_for_each_prev(p, list) { bh = BH_ENTRY(p); if (buffer_locked(bh)) { get_bh(bh); spin_unlock(lock); wait_on_buffer(bh); if (!buffer_uptodate(bh)) err = -EIO; brelse(bh); spin_lock(lock); goto repeat; } } spin_unlock(lock); return err; } /** * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written * * Starts I/O against the buffers at mapping->i_private_list, and waits upon * that I/O. * * Basically, this is a convenience function for fsync(). * @mapping is a file or directory which needs those buffers to be written for * a successful fsync(). */ int sync_mapping_buffers(struct address_space *mapping) { struct address_space *buffer_mapping = mapping->i_private_data; if (buffer_mapping == NULL || list_empty(&mapping->i_private_list)) return 0; return fsync_buffers_list(&buffer_mapping->i_private_lock, &mapping->i_private_list); } EXPORT_SYMBOL(sync_mapping_buffers); /** * generic_buffers_fsync_noflush - generic buffer fsync implementation * for simple filesystems with no inode lock * * @file: file to synchronize * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * * This is a generic implementation of the fsync method for simple * filesystems which track all non-inode metadata in the buffers list * hanging off the address_space structure. */ int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, bool datasync) { struct inode *inode = file->f_mapping->host; int err; int ret; err = file_write_and_wait_range(file, start, end); if (err) return err; ret = sync_mapping_buffers(inode->i_mapping); if (!(inode->i_state & I_DIRTY_ALL)) goto out; if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) goto out; err = sync_inode_metadata(inode, 1); if (ret == 0) ret = err; out: /* check and advance again to catch errors after syncing out buffers */ err = file_check_and_advance_wb_err(file); if (ret == 0) ret = err; return ret; } EXPORT_SYMBOL(generic_buffers_fsync_noflush); /** * generic_buffers_fsync - generic buffer fsync implementation * for simple filesystems with no inode lock * * @file: file to synchronize * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * * This is a generic implementation of the fsync method for simple * filesystems which track all non-inode metadata in the buffers list * hanging off the address_space structure. This also makes sure that * a device cache flush operation is called at the end. */ int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, bool datasync) { struct inode *inode = file->f_mapping->host; int ret; ret = generic_buffers_fsync_noflush(file, start, end, datasync); if (!ret) ret = blkdev_issue_flush(inode->i_sb->s_bdev); return ret; } EXPORT_SYMBOL(generic_buffers_fsync); /* * Called when we've recently written block `bblock', and it is known that * `bblock' was for a buffer_boundary() buffer. This means that the block at * `bblock + 1' is probably a dirty indirect block. Hunt it down and, if it's * dirty, schedule it for IO. So that indirects merge nicely with their data. */ void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize) { struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) write_dirty_buffer(bh, 0); put_bh(bh); } } void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) { struct address_space *mapping = inode->i_mapping; struct address_space *buffer_mapping = bh->b_folio->mapping; mark_buffer_dirty(bh); if (!mapping->i_private_data) { mapping->i_private_data = buffer_mapping; } else { BUG_ON(mapping->i_private_data != buffer_mapping); } if (!bh->b_assoc_map) { spin_lock(&buffer_mapping->i_private_lock); list_move_tail(&bh->b_assoc_buffers, &mapping->i_private_list); bh->b_assoc_map = mapping; spin_unlock(&buffer_mapping->i_private_lock); } } EXPORT_SYMBOL(mark_buffer_dirty_inode); /** * block_dirty_folio - Mark a folio as dirty. * @mapping: The address space containing this folio. * @folio: The folio to mark dirty. * * Filesystems which use buffer_heads can use this function as their * ->dirty_folio implementation. Some filesystems need to do a little * work before calling this function. Filesystems which do not use * buffer_heads should call filemap_dirty_folio() instead. * * If the folio has buffers, the uptodate buffers are set dirty, to * preserve dirty-state coherency between the folio and the buffers. * Buffers added to a dirty folio are created dirty. * * The buffers are dirtied before the folio is dirtied. There's a small * race window in which writeback may see the folio cleanness but not the * buffer dirtiness. That's fine. If this code were to set the folio * dirty before the buffers, writeback could clear the folio dirty flag, * see a bunch of clean buffers and we'd end up with dirty buffers/clean * folio on the dirty folio list. * * We use i_private_lock to lock against try_to_free_buffers() while * using the folio's buffer list. This also prevents clean buffers * being added to the folio after it was set dirty. * * Context: May only be called from process context. Does not sleep. * Caller must ensure that @folio cannot be truncated during this call, * typically by holding the folio lock or having a page in the folio * mapped and holding the page table lock. * * Return: True if the folio was dirtied; false if it was already dirtied. */ bool block_dirty_folio(struct address_space *mapping, struct folio *folio) { struct buffer_head *head; bool newly_dirty; spin_lock(&mapping->i_private_lock); head = folio_buffers(folio); if (head) { struct buffer_head *bh = head; do { set_buffer_dirty(bh); bh = bh->b_this_page; } while (bh != head); } /* * Lock out page's memcg migration to keep PageDirty * synchronized with per-memcg dirty page counters. */ folio_memcg_lock(folio); newly_dirty = !folio_test_set_dirty(folio); spin_unlock(&mapping->i_private_lock); if (newly_dirty) __folio_mark_dirty(folio, mapping, 1); folio_memcg_unlock(folio); if (newly_dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return newly_dirty; } EXPORT_SYMBOL(block_dirty_folio); /* * Write out and wait upon a list of buffers. * * We have conflicting pressures: we want to make sure that all * initially dirty buffers get waited on, but that any subsequently * dirtied buffers don't. After all, we don't want fsync to last * forever if somebody is actively writing to the file. * * Do this in two main stages: first we copy dirty buffers to a * temporary inode list, queueing the writes as we go. Then we clean * up, waiting for those writes to complete. * * During this second stage, any subsequent updates to the file may end * up refiling the buffer on the original inode's dirty list again, so * there is a chance we will end up with a buffer queued for write but * not yet completed on that list. So, as a final cleanup we go through * the osync code to catch these locked, dirty buffers without requeuing * any newly dirty buffers for write. */ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) { struct buffer_head *bh; struct address_space *mapping; int err = 0, err2; struct blk_plug plug; LIST_HEAD(tmp); blk_start_plug(&plug); spin_lock(lock); while (!list_empty(list)) { bh = BH_ENTRY(list->next); mapping = bh->b_assoc_map; __remove_assoc_queue(bh); /* Avoid race with mark_buffer_dirty_inode() which does * a lockless check and we rely on seeing the dirty bit */ smp_mb(); if (buffer_dirty(bh) || buffer_locked(bh)) { list_add(&bh->b_assoc_buffers, &tmp); bh->b_assoc_map = mapping; if (buffer_dirty(bh)) { get_bh(bh); spin_unlock(lock); /* * Ensure any pending I/O completes so that * write_dirty_buffer() actually writes the * current contents - it is a noop if I/O is * still in flight on potentially older * contents. */ write_dirty_buffer(bh, REQ_SYNC); /* * Kick off IO for the previous mapping. Note * that we will not run the very last mapping, * wait_on_buffer() will do that for us * through sync_buffer(). */ brelse(bh); spin_lock(lock); } } } spin_unlock(lock); blk_finish_plug(&plug); spin_lock(lock); while (!list_empty(&tmp)) { bh = BH_ENTRY(tmp.prev); get_bh(bh); mapping = bh->b_assoc_map; __remove_assoc_queue(bh); /* Avoid race with mark_buffer_dirty_inode() which does * a lockless check and we rely on seeing the dirty bit */ smp_mb(); if (buffer_dirty(bh)) { list_add(&bh->b_assoc_buffers, &mapping->i_private_list); bh->b_assoc_map = mapping; } spin_unlock(lock); wait_on_buffer(bh); if (!buffer_uptodate(bh)) err = -EIO; brelse(bh); spin_lock(lock); } spin_unlock(lock); err2 = osync_buffers_list(lock, list); if (err) return err; else return err2; } /* * Invalidate any and all dirty buffers on a given inode. We are * probably unmounting the fs, but that doesn't mean we have already * done a sync(). Just drop the buffers from the inode list. * * NOTE: we take the inode's blockdev's mapping's i_private_lock. Which * assumes that all the buffers are against the blockdev. Not true * for reiserfs. */ void invalidate_inode_buffers(struct inode *inode) { if (inode_has_buffers(inode)) { struct address_space *mapping = &inode->i_data; struct list_head *list = &mapping->i_private_list; struct address_space *buffer_mapping = mapping->i_private_data; spin_lock(&buffer_mapping->i_private_lock); while (!list_empty(list)) __remove_assoc_queue(BH_ENTRY(list->next)); spin_unlock(&buffer_mapping->i_private_lock); } } EXPORT_SYMBOL(invalidate_inode_buffers); /* * Remove any clean buffers from the inode's buffer list. This is called * when we're trying to free the inode itself. Those buffers can pin it. * * Returns true if all buffers were removed. */ int remove_inode_buffers(struct inode *inode) { int ret = 1; if (inode_has_buffers(inode)) { struct address_space *mapping = &inode->i_data; struct list_head *list = &mapping->i_private_list; struct address_space *buffer_mapping = mapping->i_private_data; spin_lock(&buffer_mapping->i_private_lock); while (!list_empty(list)) { struct buffer_head *bh = BH_ENTRY(list->next); if (buffer_dirty(bh)) { ret = 0; break; } __remove_assoc_queue(bh); } spin_unlock(&buffer_mapping->i_private_lock); } return ret; } /* * Create the appropriate buffers when given a folio for data area and * the size of each buffer.. Use the bh->b_this_page linked list to * follow the buffers created. Return NULL if unable to create more * buffers. * * The retry flag is used to differentiate async IO (paging, swapping) * which may not fail from ordinary buffer allocations. */ struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, gfp_t gfp) { struct buffer_head *bh, *head; long offset; struct mem_cgroup *memcg, *old_memcg; /* The folio lock pins the memcg */ memcg = folio_memcg(folio); old_memcg = set_active_memcg(memcg); head = NULL; offset = folio_size(folio); while ((offset -= size) >= 0) { bh = alloc_buffer_head(gfp); if (!bh) goto no_grow; bh->b_this_page = head; bh->b_blocknr = -1; head = bh; bh->b_size = size; /* Link the buffer to its folio */ folio_set_bh(bh, folio, offset); } out: set_active_memcg(old_memcg); return head; /* * In case anything failed, we just free everything we got. */ no_grow: if (head) { do { bh = head; head = head->b_this_page; free_buffer_head(bh); } while (head); } goto out; } EXPORT_SYMBOL_GPL(folio_alloc_buffers); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size) { gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; return folio_alloc_buffers(page_folio(page), size, gfp); } EXPORT_SYMBOL_GPL(alloc_page_buffers); static inline void link_dev_buffers(struct folio *folio, struct buffer_head *head) { struct buffer_head *bh, *tail; bh = head; do { tail = bh; bh = bh->b_this_page; } while (bh); tail->b_this_page = head; folio_attach_private(folio, head); } static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) { sector_t retval = ~((sector_t)0); loff_t sz = bdev_nr_bytes(bdev); if (sz) { unsigned int sizebits = blksize_bits(size); retval = (sz >> sizebits); } return retval; } /* * Initialise the state of a blockdev folio's buffers. */ static sector_t folio_init_buffers(struct folio *folio, struct block_device *bdev, unsigned size) { struct buffer_head *head = folio_buffers(folio); struct buffer_head *bh = head; bool uptodate = folio_test_uptodate(folio); sector_t block = div_u64(folio_pos(folio), size); sector_t end_block = blkdev_max_block(bdev, size); do { if (!buffer_mapped(bh)) { bh->b_end_io = NULL; bh->b_private = NULL; bh->b_bdev = bdev; bh->b_blocknr = block; if (uptodate) set_buffer_uptodate(bh); if (block < end_block) set_buffer_mapped(bh); } block++; bh = bh->b_this_page; } while (bh != head); /* * Caller needs to validate requested block against end of device. */ return end_block; } /* * Create the page-cache folio that contains the requested block. * * This is used purely for blockdev mappings. * * Returns false if we have a failure which cannot be cured by retrying * without sleeping. Returns true if we succeeded, or the caller should retry. */ static bool grow_dev_folio(struct block_device *bdev, sector_t block, pgoff_t index, unsigned size, gfp_t gfp) { struct address_space *mapping = bdev->bd_mapping; struct folio *folio; struct buffer_head *bh; sector_t end_block = 0; folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (IS_ERR(folio)) return false; bh = folio_buffers(folio); if (bh) { if (bh->b_size == size) { end_block = folio_init_buffers(folio, bdev, size); goto unlock; } /* * Retrying may succeed; for example the folio may finish * writeback, or buffers may be cleaned. This should not * happen very often; maybe we have old buffers attached to * this blockdev's page cache and we're trying to change * the block size? */ if (!try_to_free_buffers(folio)) { end_block = ~0ULL; goto unlock; } } bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT); if (!bh) goto unlock; /* * Link the folio to the buffers and initialise them. Take the * lock to be atomic wrt __find_get_block(), which does not * run under the folio lock. */ spin_lock(&mapping->i_private_lock); link_dev_buffers(folio, bh); end_block = folio_init_buffers(folio, bdev, size); spin_unlock(&mapping->i_private_lock); unlock: folio_unlock(folio); folio_put(folio); return block < end_block; } /* * Create buffers for the specified block device block's folio. If * that folio was dirty, the buffers are set dirty also. Returns false * if we've hit a permanent error. */ static bool grow_buffers(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { loff_t pos; /* * Check for a block which lies outside our maximum possible * pagecache index. */ if (check_mul_overflow(block, (sector_t)size, &pos) || pos > MAX_LFS_FILESIZE) { printk(KERN_ERR "%s: requested out-of-range block %llu for device %pg\n", __func__, (unsigned long long)block, bdev); return false; } /* Create a folio with the proper size buffers */ return grow_dev_folio(bdev, block, pos / PAGE_SIZE, size, gfp); } static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { printk(KERN_ERR "getblk(): invalid block size %d requested\n", size); printk(KERN_ERR "logical block size: %d\n", bdev_logical_block_size(bdev)); dump_stack(); return NULL; } for (;;) { struct buffer_head *bh; bh = __find_get_block(bdev, block, size); if (bh) return bh; if (!grow_buffers(bdev, block, size, gfp)) return NULL; } } /* * The relationship between dirty buffers and dirty pages: * * Whenever a page has any dirty buffers, the page's dirty bit is set, and * the page is tagged dirty in the page cache. * * At all times, the dirtiness of the buffers represents the dirtiness of * subsections of the page. If the page has buffers, the page dirty bit is * merely a hint about the true dirty state. * * When a page is set dirty in its entirety, all its buffers are marked dirty * (if the page has buffers). * * When a buffer is marked dirty, its page is dirtied, but the page's other * buffers are not. * * Also. When blockdev buffers are explicitly read with bread(), they * individually become uptodate. But their backing page remains not * uptodate - even if all of its buffers are uptodate. A subsequent * block_read_full_folio() against that folio will discover all the uptodate * buffers, will set the folio uptodate and will perform no I/O. */ /** * mark_buffer_dirty - mark a buffer_head as needing writeout * @bh: the buffer_head to mark dirty * * mark_buffer_dirty() will set the dirty bit against the buffer, then set * its backing page dirty, then tag the page as dirty in the page cache * and then attach the address_space's inode to its superblock's dirty * inode list. * * mark_buffer_dirty() is atomic. It takes bh->b_folio->mapping->i_private_lock, * i_pages lock and mapping->host->i_lock. */ void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); trace_block_dirty_buffer(bh); /* * Very *carefully* optimize the it-is-already-dirty case. * * Don't let the final "is it dirty" escape to before we * perhaps modified the buffer. */ if (buffer_dirty(bh)) { smp_mb(); if (buffer_dirty(bh)) return; } if (!test_set_buffer_dirty(bh)) { struct folio *folio = bh->b_folio; struct address_space *mapping = NULL; folio_memcg_lock(folio); if (!folio_test_set_dirty(folio)) { mapping = folio->mapping; if (mapping) __folio_mark_dirty(folio, mapping, 0); } folio_memcg_unlock(folio); if (mapping) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } } EXPORT_SYMBOL(mark_buffer_dirty); void mark_buffer_write_io_error(struct buffer_head *bh) { set_buffer_write_io_error(bh); /* FIXME: do we need to set this in both places? */ if (bh->b_folio && bh->b_folio->mapping) mapping_set_error(bh->b_folio->mapping, -EIO); if (bh->b_assoc_map) { mapping_set_error(bh->b_assoc_map, -EIO); errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO); } } EXPORT_SYMBOL(mark_buffer_write_io_error); /** * __brelse - Release a buffer. * @bh: The buffer to release. * * This variant of brelse() can be called if @bh is guaranteed to not be NULL. */ void __brelse(struct buffer_head *bh) { if (atomic_read(&bh->b_count)) { put_bh(bh); return; } WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); } EXPORT_SYMBOL(__brelse); /** * __bforget - Discard any dirty data in a buffer. * @bh: The buffer to forget. * * This variant of bforget() can be called if @bh is guaranteed to not * be NULL. */ void __bforget(struct buffer_head *bh) { clear_buffer_dirty(bh); if (bh->b_assoc_map) { struct address_space *buffer_mapping = bh->b_folio->mapping; spin_lock(&buffer_mapping->i_private_lock); list_del_init(&bh->b_assoc_buffers); bh->b_assoc_map = NULL; spin_unlock(&buffer_mapping->i_private_lock); } __brelse(bh); } EXPORT_SYMBOL(__bforget); static struct buffer_head *__bread_slow(struct buffer_head *bh) { lock_buffer(bh); if (buffer_uptodate(bh)) { unlock_buffer(bh); return bh; } else { get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; } brelse(bh); return NULL; } /* * Per-cpu buffer LRU implementation. To reduce the cost of __find_get_block(). * The bhs[] array is sorted - newest buffer is at bhs[0]. Buffers have their * refcount elevated by one when they're in an LRU. A buffer can only appear * once in a particular CPU's LRU. A single buffer can be present in multiple * CPU's LRUs at the same time. * * This is a transparent caching front-end to sb_bread(), sb_getblk() and * sb_find_get_block(). * * The LRUs themselves only need locking against invalidate_bh_lrus. We use * a local interrupt disable for that. */ #define BH_LRU_SIZE 16 struct bh_lru { struct buffer_head *bhs[BH_LRU_SIZE]; }; static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }}; #ifdef CONFIG_SMP #define bh_lru_lock() local_irq_disable() #define bh_lru_unlock() local_irq_enable() #else #define bh_lru_lock() preempt_disable() #define bh_lru_unlock() preempt_enable() #endif static inline void check_irqs_on(void) { #ifdef irqs_disabled BUG_ON(irqs_disabled()); #endif } /* * Install a buffer_head into this cpu's LRU. If not already in the LRU, it is * inserted at the front, and the buffer_head at the back if any is evicted. * Or, if already in the LRU it is moved to the front. */ static void bh_lru_install(struct buffer_head *bh) { struct buffer_head *evictee = bh; struct bh_lru *b; int i; check_irqs_on(); bh_lru_lock(); /* * the refcount of buffer_head in bh_lru prevents dropping the * attached page(i.e., try_to_free_buffers) so it could cause * failing page migration. * Skip putting upcoming bh into bh_lru until migration is done. */ if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) { bh_lru_unlock(); return; } b = this_cpu_ptr(&bh_lrus); for (i = 0; i < BH_LRU_SIZE; i++) { swap(evictee, b->bhs[i]); if (evictee == bh) { bh_lru_unlock(); return; } } get_bh(bh); bh_lru_unlock(); brelse(evictee); } /* * Look up the bh in this cpu's LRU. If it's there, move it to the head. */ static struct buffer_head * lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *ret = NULL; unsigned int i; check_irqs_on(); bh_lru_lock(); if (cpu_is_isolated(smp_processor_id())) { bh_lru_unlock(); return NULL; } for (i = 0; i < BH_LRU_SIZE; i++) { struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]); if (bh && bh->b_blocknr == block && bh->b_bdev == bdev && bh->b_size == size) { if (i) { while (i) { __this_cpu_write(bh_lrus.bhs[i], __this_cpu_read(bh_lrus.bhs[i - 1])); i--; } __this_cpu_write(bh_lrus.bhs[0], bh); } get_bh(bh); ret = bh; break; } } bh_lru_unlock(); return ret; } /* * Perform a pagecache lookup for the matching buffer. If it's there, refresh * it in the LRU and mark it as accessed. If it is not present then return * NULL */ struct buffer_head * __find_get_block(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { /* __find_get_block_slow will mark the page accessed */ bh = __find_get_block_slow(bdev, block); if (bh) bh_lru_install(bh); } else touch_buffer(bh); return bh; } EXPORT_SYMBOL(__find_get_block); /** * bdev_getblk - Get a buffer_head in a block device's buffer cache. * @bdev: The block device. * @block: The block number. * @size: The size of buffer_heads for this @bdev. * @gfp: The memory allocation flags to use. * * The returned buffer head has its reference count incremented, but is * not locked. The caller should call brelse() when it has finished * with the buffer. The buffer may not be uptodate. If needed, the * caller can bring it uptodate either by reading it or overwriting it. * * Return: The buffer head, or NULL if memory could not be allocated. */ struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { struct buffer_head *bh = __find_get_block(bdev, block, size); might_alloc(gfp); if (bh) return bh; return __getblk_slow(bdev, block, size, gfp); } EXPORT_SYMBOL(bdev_getblk); /* * Do async read-ahead on a buffer.. */ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = bdev_getblk(bdev, block, size, GFP_NOWAIT | __GFP_MOVABLE); if (likely(bh)) { bh_readahead(bh, REQ_RAHEAD); brelse(bh); } } EXPORT_SYMBOL(__breadahead); /** * __bread_gfp() - Read a block. * @bdev: The block device to read from. * @block: Block number in units of block size. * @size: The block size of this device in bytes. * @gfp: Not page allocation flags; see below. * * You are not expected to call this function. You should use one of * sb_bread(), sb_bread_unmovable() or __bread(). * * Read a specified block, and return the buffer head that refers to it. * If @gfp is 0, the memory will be allocated using the block device's * default GFP flags. If @gfp is __GFP_MOVABLE, the memory may be * allocated from a movable area. Do not pass in a complete set of * GFP flags. * * The returned buffer head has its refcount increased. The caller should * call brelse() when it has finished with the buffer. * * Context: May sleep waiting for I/O. * Return: NULL if the block was unreadable. */ struct buffer_head *__bread_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { struct buffer_head *bh; gfp |= mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS); /* * Prefer looping in the allocator rather than here, at least that * code knows what it's doing. */ gfp |= __GFP_NOFAIL; bh = bdev_getblk(bdev, block, size, gfp); if (likely(bh) && !buffer_uptodate(bh)) bh = __bread_slow(bh); return bh; } EXPORT_SYMBOL(__bread_gfp); static void __invalidate_bh_lrus(struct bh_lru *b) { int i; for (i = 0; i < BH_LRU_SIZE; i++) { brelse(b->bhs[i]); b->bhs[i] = NULL; } } /* * invalidate_bh_lrus() is called rarely - but not only at unmount. * This doesn't race because it runs in each cpu either in irq * or with preempt disabled. */ static void invalidate_bh_lru(void *arg) { struct bh_lru *b = &get_cpu_var(bh_lrus); __invalidate_bh_lrus(b); put_cpu_var(bh_lrus); } bool has_bh_in_lru(int cpu, void *dummy) { struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu); int i; for (i = 0; i < BH_LRU_SIZE; i++) { if (b->bhs[i]) return true; } return false; } void invalidate_bh_lrus(void) { on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1); } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); /* * It's called from workqueue context so we need a bh_lru_lock to close * the race with preemption/irq. */ void invalidate_bh_lrus_cpu(void) { struct bh_lru *b; bh_lru_lock(); b = this_cpu_ptr(&bh_lrus); __invalidate_bh_lrus(b); bh_lru_unlock(); } void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset) { bh->b_folio = folio; BUG_ON(offset >= folio_size(folio)); if (folio_test_highmem(folio)) /* * This catches illegal uses and preserves the offset: */ bh->b_data = (char *)(0 + offset); else bh->b_data = folio_address(folio) + offset; } EXPORT_SYMBOL(folio_set_bh); /* * Called when truncating a buffer on a page completely. */ /* Bits that are cleared during an invalidate */ #define BUFFER_FLAGS_DISCARD \ (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \ 1 << BH_Delay | 1 << BH_Unwritten) static void discard_buffer(struct buffer_head * bh) { unsigned long b_state; lock_buffer(bh); clear_buffer_dirty(bh); bh->b_bdev = NULL; b_state = READ_ONCE(bh->b_state); do { } while (!try_cmpxchg(&bh->b_state, &b_state, b_state & ~BUFFER_FLAGS_DISCARD)); unlock_buffer(bh); } /** * block_invalidate_folio - Invalidate part or all of a buffer-backed folio. * @folio: The folio which is affected. * @offset: start of the range to invalidate * @length: length of the range to invalidate * * block_invalidate_folio() is called when all or part of the folio has been * invalidated by a truncate operation. * * block_invalidate_folio() does not have to release all buffers, but it must * ensure that no dirty buffer is left outside @offset and that no I/O * is underway against any of the blocks which are outside the truncation * point. Because the caller is about to free (and possibly reuse) those * blocks on-disk. */ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length) { struct buffer_head *head, *bh, *next; size_t curr_off = 0; size_t stop = length + offset; BUG_ON(!folio_test_locked(folio)); /* * Check for overflow */ BUG_ON(stop > folio_size(folio) || stop < length); head = folio_buffers(folio); if (!head) return; bh = head; do { size_t next_off = curr_off + bh->b_size; next = bh->b_this_page; /* * Are we still fully in range ? */ if (next_off > stop) goto out; /* * is this block fully invalidated? */ if (offset <= curr_off) discard_buffer(bh); curr_off = next_off; bh = next; } while (bh != head); /* * We release buffers only if the entire folio is being invalidated. * The get_block cached value has been unconditionally invalidated, * so real IO is not possible anymore. */ if (length == folio_size(folio)) filemap_release_folio(folio, 0); out: return; } EXPORT_SYMBOL(block_invalidate_folio); /* * We attach and possibly dirty the buffers atomically wrt * block_dirty_folio() via i_private_lock. try_to_free_buffers * is already excluded via the folio lock. */ struct buffer_head *create_empty_buffers(struct folio *folio, unsigned long blocksize, unsigned long b_state) { struct buffer_head *bh, *head, *tail; gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT | __GFP_NOFAIL; head = folio_alloc_buffers(folio, blocksize, gfp); bh = head; do { bh->b_state |= b_state; tail = bh; bh = bh->b_this_page; } while (bh); tail->b_this_page = head; spin_lock(&folio->mapping->i_private_lock); if (folio_test_uptodate(folio) || folio_test_dirty(folio)) { bh = head; do { if (folio_test_dirty(folio)) set_buffer_dirty(bh); if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); bh = bh->b_this_page; } while (bh != head); } folio_attach_private(folio, head); spin_unlock(&folio->mapping->i_private_lock); return head; } EXPORT_SYMBOL(create_empty_buffers); /** * clean_bdev_aliases: clean a range of buffers in block device * @bdev: Block device to clean buffers in * @block: Start of a range of blocks to clean * @len: Number of blocks to clean * * We are taking a range of blocks for data and we don't want writeback of any * buffer-cache aliases starting from return from this function and until the * moment when something will explicitly mark the buffer dirty (hopefully that * will not happen until we will free that block ;-) We don't even need to mark * it not-uptodate - nobody can expect anything from a newly allocated buffer * anyway. We used to use unmap_buffer() for such invalidation, but that was * wrong. We definitely don't want to mark the alias unmapped, for example - it * would confuse anyone who might pick it with bread() afterwards... * * Also.. Note that bforget() doesn't lock the buffer. So there can be * writeout I/O going on against recently-freed buffers. We don't wait on that * I/O in bforget() - it's more efficient to wait on the I/O only if we really * need to. That happens here. */ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; struct folio_batch fbatch; pgoff_t index = ((loff_t)block << blkbits) / PAGE_SIZE; pgoff_t end; int i, count; struct buffer_head *bh; struct buffer_head *head; end = ((loff_t)(block + len - 1) << blkbits) / PAGE_SIZE; folio_batch_init(&fbatch); while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) { count = folio_batch_count(&fbatch); for (i = 0; i < count; i++) { struct folio *folio = fbatch.folios[i]; if (!folio_buffers(folio)) continue; /* * We use folio lock instead of bd_mapping->i_private_lock * to pin buffers here since we can afford to sleep and * it scales better than a global spinlock lock. */ folio_lock(folio); /* Recheck when the folio is locked which pins bhs */ head = folio_buffers(folio); if (!head) goto unlock_page; bh = head; do { if (!buffer_mapped(bh) || (bh->b_blocknr < block)) goto next; if (bh->b_blocknr >= block + len) break; clear_buffer_dirty(bh); wait_on_buffer(bh); clear_buffer_req(bh); next: bh = bh->b_this_page; } while (bh != head); unlock_page: folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); /* End of range already reached? */ if (index > end || !index) break; } } EXPORT_SYMBOL(clean_bdev_aliases); static struct buffer_head *folio_create_buffers(struct folio *folio, struct inode *inode, unsigned int b_state) { struct buffer_head *bh; BUG_ON(!folio_test_locked(folio)); bh = folio_buffers(folio); if (!bh) bh = create_empty_buffers(folio, 1 << READ_ONCE(inode->i_blkbits), b_state); return bh; } /* * NOTE! All mapped/uptodate combinations are valid: * * Mapped Uptodate Meaning * * No No "unknown" - must do get_block() * No Yes "hole" - zero-filled * Yes No "allocated" - allocated on disk, not read in * Yes Yes "valid" - allocated and up-to-date in memory. * * "Dirty" is valid only with the last case (mapped+uptodate). */ /* * While block_write_full_folio is writing back the dirty buffers under * the page lock, whoever dirtied the buffers may decide to clean them * again at any time. We handle that by only looking at the buffer * state inside lock_buffer(). * * If block_write_full_folio() is called for regular writeback * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a * locked buffer. This only can happen if someone has written the buffer * directly, with submit_bh(). At the address_space level PageWriteback * prevents this contention from occurring. * * If block_write_full_folio() is called with wbc->sync_mode == * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this * causes the writes to be flagged as synchronous writes. */ int __block_write_full_folio(struct inode *inode, struct folio *folio, get_block_t *get_block, struct writeback_control *wbc) { int err; sector_t block; sector_t last_block; struct buffer_head *bh, *head; size_t blocksize; int nr_underway = 0; blk_opf_t write_flags = wbc_to_write_flags(wbc); head = folio_create_buffers(folio, inode, (1 << BH_Dirty) | (1 << BH_Uptodate)); /* * Be very careful. We have no exclusion from block_dirty_folio * here, and the (potentially unmapped) buffers may become dirty at * any time. If a buffer becomes dirty here after we've inspected it * then we just miss that fact, and the folio stays dirty. * * Buffers outside i_size may be dirtied by block_dirty_folio; * handle that here by just cleaning them. */ bh = head; blocksize = bh->b_size; block = div_u64(folio_pos(folio), blocksize); last_block = div_u64(i_size_read(inode) - 1, blocksize); /* * Get all the dirty buffers mapped to disk addresses and * handle any aliases from the underlying blockdev's mapping. */ do { if (block > last_block) { /* * mapped buffers outside i_size will occur, because * this folio can be outside i_size when there is a * truncate in progress. */ /* * The buffer was zeroed by block_write_full_folio() */ clear_buffer_dirty(bh); set_buffer_uptodate(bh); } else if ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)) { WARN_ON(bh->b_size != blocksize); err = get_block(inode, block, bh, 1); if (err) goto recover; clear_buffer_delay(bh); if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); clean_bdev_bh_alias(bh); } } bh = bh->b_this_page; block++; } while (bh != head); do { if (!buffer_mapped(bh)) continue; /* * If it's a fully non-blocking write attempt and we cannot * lock the buffer then redirty the folio. Note that this can * potentially cause a busy-wait loop from writeback threads * and kswapd activity, but those code paths have their own * higher-level throttling. */ if (wbc->sync_mode != WB_SYNC_NONE) { lock_buffer(bh); } else if (!trylock_buffer(bh)) { folio_redirty_for_writepage(wbc, folio); continue; } if (test_clear_buffer_dirty(bh)) { mark_buffer_async_write_endio(bh, end_buffer_async_write); } else { unlock_buffer(bh); } } while ((bh = bh->b_this_page) != head); /* * The folio and its buffers are protected by the writeback flag, * so we can drop the bh refcounts early. */ BUG_ON(folio_test_writeback(folio)); folio_start_writeback(folio); do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, inode->i_write_hint, wbc); nr_underway++; } bh = next; } while (bh != head); folio_unlock(folio); err = 0; done: if (nr_underway == 0) { /* * The folio was marked dirty, but the buffers were * clean. Someone wrote them back by hand with * write_dirty_buffer/submit_bh. A rare case. */ folio_end_writeback(folio); /* * The folio and buffer_heads can be released at any time from * here on. */ } return err; recover: /* * ENOSPC, or some other error. We may already have added some * blocks to the file, so we need to write these out to avoid * exposing stale data. * The folio is currently locked and not marked for writeback */ bh = head; /* Recovery: lock and submit the mapped buffers */ do { if (buffer_mapped(bh) && buffer_dirty(bh) && !buffer_delay(bh)) { lock_buffer(bh); mark_buffer_async_write_endio(bh, end_buffer_async_write); } else { /* * The buffer may have been set dirty during * attachment to a dirty folio. */ clear_buffer_dirty(bh); } } while ((bh = bh->b_this_page) != head); BUG_ON(folio_test_writeback(folio)); mapping_set_error(folio->mapping, err); folio_start_writeback(folio); do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, inode->i_write_hint, wbc); nr_underway++; } bh = next; } while (bh != head); folio_unlock(folio); goto done; } EXPORT_SYMBOL(__block_write_full_folio); /* * If a folio has any new buffers, zero them out here, and mark them uptodate * and dirty so they'll be written out (in order to prevent uninitialised * block data from leaking). And clear the new bit. */ void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to) { size_t block_start, block_end; struct buffer_head *head, *bh; BUG_ON(!folio_test_locked(folio)); head = folio_buffers(folio); if (!head) return; bh = head; block_start = 0; do { block_end = block_start + bh->b_size; if (buffer_new(bh)) { if (block_end > from && block_start < to) { if (!folio_test_uptodate(folio)) { size_t start, xend; start = max(from, block_start); xend = min(to, block_end); folio_zero_segment(folio, start, xend); set_buffer_uptodate(bh); } clear_buffer_new(bh); mark_buffer_dirty(bh); } } block_start = block_end; bh = bh->b_this_page; } while (bh != head); } EXPORT_SYMBOL(folio_zero_new_buffers); static int iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, const struct iomap *iomap) { loff_t offset = (loff_t)block << inode->i_blkbits; bh->b_bdev = iomap->bdev; /* * Block points to offset in file we need to map, iomap contains * the offset at which the map starts. If the map ends before the * current block, then do not map the buffer and let the caller * handle it. */ if (offset >= iomap->offset + iomap->length) return -EIO; switch (iomap->type) { case IOMAP_HOLE: /* * If the buffer is not up to date or beyond the current EOF, * we need to mark it as new to ensure sub-block zeroing is * executed if necessary. */ if (!buffer_uptodate(bh) || (offset >= i_size_read(inode))) set_buffer_new(bh); return 0; case IOMAP_DELALLOC: if (!buffer_uptodate(bh) || (offset >= i_size_read(inode))) set_buffer_new(bh); set_buffer_uptodate(bh); set_buffer_mapped(bh); set_buffer_delay(bh); return 0; case IOMAP_UNWRITTEN: /* * For unwritten regions, we always need to ensure that regions * in the block we are not writing to are zeroed. Mark the * buffer as new to ensure this. */ set_buffer_new(bh); set_buffer_unwritten(bh); fallthrough; case IOMAP_MAPPED: if ((iomap->flags & IOMAP_F_NEW) || offset >= i_size_read(inode)) { /* * This can happen if truncating the block device races * with the check in the caller as i_size updates on * block devices aren't synchronized by i_rwsem for * block devices. */ if (S_ISBLK(inode->i_mode)) return -EIO; set_buffer_new(bh); } bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> inode->i_blkbits; set_buffer_mapped(bh); return 0; default: WARN_ON_ONCE(1); return -EIO; } } int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block, const struct iomap *iomap) { size_t from = offset_in_folio(folio, pos); size_t to = from + len; struct inode *inode = folio->mapping->host; size_t block_start, block_end; sector_t block; int err = 0; size_t blocksize; struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; BUG_ON(!folio_test_locked(folio)); BUG_ON(to > folio_size(folio)); BUG_ON(from > to); head = folio_create_buffers(folio, inode, 0); blocksize = head->b_size; block = div_u64(folio_pos(folio), blocksize); for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start=block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (folio_test_uptodate(folio)) { if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } continue; } if (buffer_new(bh)) clear_buffer_new(bh); if (!buffer_mapped(bh)) { WARN_ON(bh->b_size != blocksize); if (get_block) err = get_block(inode, block, bh, 1); else err = iomap_to_bh(inode, block, bh, iomap); if (err) break; if (buffer_new(bh)) { clean_bdev_bh_alias(bh); if (folio_test_uptodate(folio)) { clear_buffer_new(bh); set_buffer_uptodate(bh); mark_buffer_dirty(bh); continue; } if (block_end > to || block_start < from) folio_zero_segments(folio, to, block_end, block_start, from); continue; } } if (folio_test_uptodate(folio)) { if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); continue; } if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh) && (block_start < from || block_end > to)) { bh_read_nowait(bh, 0); *wait_bh++=bh; } } /* * If we issued read requests - let them complete. */ while(wait_bh > wait) { wait_on_buffer(*--wait_bh); if (!buffer_uptodate(*wait_bh)) err = -EIO; } if (unlikely(err)) folio_zero_new_buffers(folio, from, to); return err; } int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block) { return __block_write_begin_int(folio, pos, len, get_block, NULL); } EXPORT_SYMBOL(__block_write_begin); static void __block_commit_write(struct folio *folio, size_t from, size_t to) { size_t block_start, block_end; bool partial = false; unsigned blocksize; struct buffer_head *bh, *head; bh = head = folio_buffers(folio); if (!bh) return; blocksize = bh->b_size; block_start = 0; do { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (!buffer_uptodate(bh)) partial = true; } else { set_buffer_uptodate(bh); mark_buffer_dirty(bh); } if (buffer_new(bh)) clear_buffer_new(bh); block_start = block_end; bh = bh->b_this_page; } while (bh != head); /* * If this is a partial write which happened to make all buffers * uptodate then we can optimize away a bogus read_folio() for * the next read(). Here we 'discover' whether the folio went * uptodate as a result of this (potentially partial) write. */ if (!partial) folio_mark_uptodate(folio); } /* * block_write_begin takes care of the basic task of block allocation and * bringing partial write blocks uptodate first. * * The filesystem needs to handle block truncation upon failure. */ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, get_block_t *get_block) { pgoff_t index = pos >> PAGE_SHIFT; struct folio *folio; int status; folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); status = __block_write_begin_int(folio, pos, len, get_block, NULL); if (unlikely(status)) { folio_unlock(folio); folio_put(folio); folio = NULL; } *foliop = folio; return status; } EXPORT_SYMBOL(block_write_begin); int block_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata) { size_t start = pos - folio_pos(folio); if (unlikely(copied < len)) { /* * The buffers that were written will now be uptodate, so * we don't have to worry about a read_folio reading them * and overwriting a partial write. However if we have * encountered a short write and only partially written * into a buffer, it will not be marked uptodate, so a * read_folio might come in and destroy our partial write. * * Do the simplest thing, and just treat any short write to a * non uptodate folio as a zero-length write, and force the * caller to redo the whole thing. */ if (!folio_test_uptodate(folio)) copied = 0; folio_zero_new_buffers(folio, start+copied, start+len); } flush_dcache_folio(folio); /* This could be a short (even 0-length) commit */ __block_commit_write(folio, start, start + copied); return copied; } EXPORT_SYMBOL(block_write_end); int generic_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; loff_t old_size = inode->i_size; bool i_size_changed = false; copied = block_write_end(file, mapping, pos, len, copied, folio, fsdata); /* * No need to use i_size_read() here, the i_size cannot change under us * because we hold i_rwsem. * * But it's important to update i_size while still holding folio lock: * page writeout could otherwise come in and zero beyond i_size. */ if (pos + copied > inode->i_size) { i_size_write(inode, pos + copied); i_size_changed = true; } folio_unlock(folio); folio_put(folio); if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); /* * Don't mark the inode dirty under page lock. First, it unnecessarily * makes the holding time of page lock longer. Second, it forces lock * ordering of page lock and transaction start for journaling * filesystems. */ if (i_size_changed) mark_inode_dirty(inode); return copied; } EXPORT_SYMBOL(generic_write_end); /* * block_is_partially_uptodate checks whether buffers within a folio are * uptodate or not. * * Returns true if all buffers which correspond to the specified part * of the folio are uptodate. */ bool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count) { unsigned block_start, block_end, blocksize; unsigned to; struct buffer_head *bh, *head; bool ret = true; head = folio_buffers(folio); if (!head) return false; blocksize = head->b_size; to = min_t(unsigned, folio_size(folio) - from, count); to = from + to; if (from < blocksize && to > folio_size(folio) - blocksize) return false; bh = head; block_start = 0; do { block_end = block_start + blocksize; if (block_end > from && block_start < to) { if (!buffer_uptodate(bh)) { ret = false; break; } if (block_end >= to) break; } block_start = block_end; bh = bh->b_this_page; } while (bh != head); return ret; } EXPORT_SYMBOL(block_is_partially_uptodate); /* * Generic "read_folio" function for block devices that have the normal * get_block functionality. This is most of the block device filesystems. * Reads the folio asynchronously --- the unlock_buffer() and * set/clear_buffer_uptodate() functions propagate buffer state into the * folio once IO has completed. */ int block_read_full_folio(struct folio *folio, get_block_t *get_block) { struct inode *inode = folio->mapping->host; sector_t iblock, lblock; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; size_t blocksize; int nr, i; int fully_mapped = 1; bool page_error = false; loff_t limit = i_size_read(inode); /* This is needed for ext4. */ if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) limit = inode->i_sb->s_maxbytes; VM_BUG_ON_FOLIO(folio_test_large(folio), folio); head = folio_create_buffers(folio, inode, 0); blocksize = head->b_size; iblock = div_u64(folio_pos(folio), blocksize); lblock = div_u64(limit + blocksize - 1, blocksize); bh = head; nr = 0; i = 0; do { if (buffer_uptodate(bh)) continue; if (!buffer_mapped(bh)) { int err = 0; fully_mapped = 0; if (iblock < lblock) { WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); if (err) page_error = true; } if (!buffer_mapped(bh)) { folio_zero_range(folio, i * blocksize, blocksize); if (!err) set_buffer_uptodate(bh); continue; } /* * get_block() might have updated the buffer * synchronously */ if (buffer_uptodate(bh)) continue; } arr[nr++] = bh; } while (i++, iblock++, (bh = bh->b_this_page) != head); if (fully_mapped) folio_set_mappedtodisk(folio); if (!nr) { /* * All buffers are uptodate or get_block() returned an * error when trying to map them - we can finish the read. */ folio_end_read(folio, !page_error); return 0; } /* Stage two: lock the buffers */ for (i = 0; i < nr; i++) { bh = arr[i]; lock_buffer(bh); mark_buffer_async_read(bh); } /* * Stage 3: start the IO. Check for uptodateness * inside the buffer lock in case another process reading * the underlying blockdev brought it uptodate (the sct fix). */ for (i = 0; i < nr; i++) { bh = arr[i]; if (buffer_uptodate(bh)) end_buffer_async_read(bh, 1); else submit_bh(REQ_OP_READ, bh); } return 0; } EXPORT_SYMBOL(block_read_full_folio); /* utility function for filesystems that need to do work on expanding * truncates. Uses filesystem pagecache writes to allow the filesystem to * deal with the hole. */ int generic_cont_expand_simple(struct inode *inode, loff_t size) { struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; struct folio *folio; void *fsdata = NULL; int err; err = inode_newsize_ok(inode, size); if (err) goto out; err = aops->write_begin(NULL, mapping, size, 0, &folio, &fsdata); if (err) goto out; err = aops->write_end(NULL, mapping, size, 0, 0, folio, fsdata); BUG_ON(err > 0); out: return err; } EXPORT_SYMBOL(generic_cont_expand_simple); static int cont_expand_zero(struct file *file, struct address_space *mapping, loff_t pos, loff_t *bytes) { struct inode *inode = mapping->host; const struct address_space_operations *aops = mapping->a_ops; unsigned int blocksize = i_blocksize(inode); struct folio *folio; void *fsdata = NULL; pgoff_t index, curidx; loff_t curpos; unsigned zerofrom, offset, len; int err = 0; index = pos >> PAGE_SHIFT; offset = pos & ~PAGE_MASK; while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) { zerofrom = curpos & ~PAGE_MASK; if (zerofrom & (blocksize-1)) { *bytes |= (blocksize-1); (*bytes)++; } len = PAGE_SIZE - zerofrom; err = aops->write_begin(file, mapping, curpos, len, &folio, &fsdata); if (err) goto out; folio_zero_range(folio, offset_in_folio(folio, curpos), len); err = aops->write_end(file, mapping, curpos, len, len, folio, fsdata); if (err < 0) goto out; BUG_ON(err != len); err = 0; balance_dirty_pages_ratelimited(mapping); if (fatal_signal_pending(current)) { err = -EINTR; goto out; } } /* page covers the boundary, find the boundary offset */ if (index == curidx) { zerofrom = curpos & ~PAGE_MASK; /* if we will expand the thing last block will be filled */ if (offset <= zerofrom) { goto out; } if (zerofrom & (blocksize-1)) { *bytes |= (blocksize-1); (*bytes)++; } len = offset - zerofrom; err = aops->write_begin(file, mapping, curpos, len, &folio, &fsdata); if (err) goto out; folio_zero_range(folio, offset_in_folio(folio, curpos), len); err = aops->write_end(file, mapping, curpos, len, len, folio, fsdata); if (err < 0) goto out; BUG_ON(err != len); err = 0; } out: return err; } /* * For moronic filesystems that do not allow holes in file. * We may have to extend the file. */ int cont_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata, get_block_t *get_block, loff_t *bytes) { struct inode *inode = mapping->host; unsigned int blocksize = i_blocksize(inode); unsigned int zerofrom; int err; err = cont_expand_zero(file, mapping, pos, bytes); if (err) return err; zerofrom = *bytes & ~PAGE_MASK; if (pos+len > *bytes && zerofrom & (blocksize-1)) { *bytes |= (blocksize-1); (*bytes)++; } return block_write_begin(mapping, pos, len, foliop, get_block); } EXPORT_SYMBOL(cont_write_begin); void block_commit_write(struct page *page, unsigned from, unsigned to) { struct folio *folio = page_folio(page); __block_commit_write(folio, from, to); } EXPORT_SYMBOL(block_commit_write); /* * block_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must * be careful to check for EOF conditions here. We set the page up correctly * for a written page which means we get ENOSPC checking when writing into * holes and correct delalloc and unwritten extent mapping on filesystems that * support these features. * * We are not allowed to take the i_mutex here so we have to play games to * protect against truncate races as the page could now be beyond EOF. Because * truncate writes the inode size before removing pages, once we have the * page lock we can determine safely if the page is beyond EOF. If it is not * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. * * Direct callers of this function should protect against filesystem freezing * using sb_start_pagefault() - sb_end_pagefault() functions. */ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vma->vm_file); unsigned long end; loff_t size; int ret; folio_lock(folio); size = i_size_read(inode); if ((folio->mapping != inode->i_mapping) || (folio_pos(folio) >= size)) { /* We overload EFAULT to mean page got truncated */ ret = -EFAULT; goto out_unlock; } end = folio_size(folio); /* folio is wholly or partially inside EOF */ if (folio_pos(folio) + end > size) end = size - folio_pos(folio); ret = __block_write_begin_int(folio, 0, end, get_block, NULL); if (unlikely(ret)) goto out_unlock; __block_commit_write(folio, 0, end); folio_mark_dirty(folio); folio_wait_stable(folio); return 0; out_unlock: folio_unlock(folio); return ret; } EXPORT_SYMBOL(block_page_mkwrite); int block_truncate_page(struct address_space *mapping, loff_t from, get_block_t *get_block) { pgoff_t index = from >> PAGE_SHIFT; unsigned blocksize; sector_t iblock; size_t offset, length, pos; struct inode *inode = mapping->host; struct folio *folio; struct buffer_head *bh; int err = 0; blocksize = i_blocksize(inode); length = from & (blocksize - 1); /* Block boundary? Nothing to do */ if (!length) return 0; length = blocksize - length; iblock = ((loff_t)index * PAGE_SIZE) >> inode->i_blkbits; folio = filemap_grab_folio(mapping, index); if (IS_ERR(folio)) return PTR_ERR(folio); bh = folio_buffers(folio); if (!bh) bh = create_empty_buffers(folio, blocksize, 0); /* Find the buffer that contains "offset" */ offset = offset_in_folio(folio, from); pos = blocksize; while (offset >= pos) { bh = bh->b_this_page; iblock++; pos += blocksize; } if (!buffer_mapped(bh)) { WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); if (err) goto unlock; /* unmapped? It's a hole - nothing to do */ if (!buffer_mapped(bh)) goto unlock; } /* Ok, it's mapped. Make sure it's up-to-date */ if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) { err = bh_read(bh, 0); /* Uhhuh. Read error. Complain and punt. */ if (err < 0) goto unlock; } folio_zero_range(folio, offset, length); mark_buffer_dirty(bh); unlock: folio_unlock(folio); folio_put(folio); return err; } EXPORT_SYMBOL(block_truncate_page); /* * The generic ->writepage function for buffer-backed address_spaces */ int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, void *get_block) { struct inode * const inode = folio->mapping->host; loff_t i_size = i_size_read(inode); /* Is the folio fully inside i_size? */ if (folio_pos(folio) + folio_size(folio) <= i_size) return __block_write_full_folio(inode, folio, get_block, wbc); /* Is the folio fully outside i_size? (truncate in progress) */ if (folio_pos(folio) >= i_size) { folio_unlock(folio); return 0; /* don't care */ } /* * The folio straddles i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the page size. For a file that is not a multiple of * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ folio_zero_segment(folio, offset_in_folio(folio, i_size), folio_size(folio)); return __block_write_full_folio(inode, folio, get_block, wbc); } sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block_t *get_block) { struct inode *inode = mapping->host; struct buffer_head tmp = { .b_size = i_blocksize(inode), }; get_block(inode, block, &tmp, 0); return tmp.b_blocknr; } EXPORT_SYMBOL(generic_block_bmap); static void end_bio_bh_io_sync(struct bio *bio) { struct buffer_head *bh = bio->bi_private; if (unlikely(bio_flagged(bio, BIO_QUIET))) set_bit(BH_Quiet, &bh->b_state); bh->b_end_io(bh, !bio->bi_status); bio_put(bio); } static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, enum rw_hint write_hint, struct writeback_control *wbc) { const enum req_op op = opf & REQ_OP_MASK; struct bio *bio; BUG_ON(!buffer_locked(bh)); BUG_ON(!buffer_mapped(bh)); BUG_ON(!bh->b_end_io); BUG_ON(buffer_delay(bh)); BUG_ON(buffer_unwritten(bh)); /* * Only clear out a write error when rewriting */ if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE)) clear_buffer_write_io_error(bh); if (buffer_meta(bh)) opf |= REQ_META; if (buffer_prio(bh)) opf |= REQ_PRIO; bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_write_hint = write_hint; __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; /* Take care of bh's that straddle the end of the device */ guard_bio_eod(bio); if (wbc) { wbc_init_bio(wbc, bio); wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size); } submit_bio(bio); } void submit_bh(blk_opf_t opf, struct buffer_head *bh) { submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL); } EXPORT_SYMBOL(submit_bh); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) { lock_buffer(bh); if (!test_clear_buffer_dirty(bh)) { unlock_buffer(bh); return; } bh->b_end_io = end_buffer_write_sync; get_bh(bh); submit_bh(REQ_OP_WRITE | op_flags, bh); } EXPORT_SYMBOL(write_dirty_buffer); /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) { WARN_ON(atomic_read(&bh->b_count) < 1); lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { /* * The bh should be mapped, but it might not be if the * device was hot-removed. Not much we can do but fail the I/O. */ if (!buffer_mapped(bh)) { unlock_buffer(bh); return -EIO; } get_bh(bh); bh->b_end_io = end_buffer_write_sync; submit_bh(REQ_OP_WRITE | op_flags, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) return -EIO; } else { unlock_buffer(bh); } return 0; } EXPORT_SYMBOL(__sync_dirty_buffer); int sync_dirty_buffer(struct buffer_head *bh) { return __sync_dirty_buffer(bh, REQ_SYNC); } EXPORT_SYMBOL(sync_dirty_buffer); static inline int buffer_busy(struct buffer_head *bh) { return atomic_read(&bh->b_count) | (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock))); } static bool drop_buffers(struct folio *folio, struct buffer_head **buffers_to_free) { struct buffer_head *head = folio_buffers(folio); struct buffer_head *bh; bh = head; do { if (buffer_busy(bh)) goto failed; bh = bh->b_this_page; } while (bh != head); do { struct buffer_head *next = bh->b_this_page; if (bh->b_assoc_map) __remove_assoc_queue(bh); bh = next; } while (bh != head); *buffers_to_free = head; folio_detach_private(folio); return true; failed: return false; } /** * try_to_free_buffers - Release buffers attached to this folio. * @folio: The folio. * * If any buffers are in use (dirty, under writeback, elevated refcount), * no buffers will be freed. * * If the folio is dirty but all the buffers are clean then we need to * be sure to mark the folio clean as well. This is because the folio * may be against a block device, and a later reattachment of buffers * to a dirty folio will set *all* buffers dirty. Which would corrupt * filesystem data on the same device. * * The same applies to regular filesystem folios: if all the buffers are * clean then we set the folio clean and proceed. To do that, we require * total exclusion from block_dirty_folio(). That is obtained with * i_private_lock. * * Exclusion against try_to_free_buffers may be obtained by either * locking the folio or by holding its mapping's i_private_lock. * * Context: Process context. @folio must be locked. Will not sleep. * Return: true if all buffers attached to this folio were freed. */ bool try_to_free_buffers(struct folio *folio) { struct address_space * const mapping = folio->mapping; struct buffer_head *buffers_to_free = NULL; bool ret = 0; BUG_ON(!folio_test_locked(folio)); if (folio_test_writeback(folio)) return false; if (mapping == NULL) { /* can this still happen? */ ret = drop_buffers(folio, &buffers_to_free); goto out; } spin_lock(&mapping->i_private_lock); ret = drop_buffers(folio, &buffers_to_free); /* * If the filesystem writes its buffers by hand (eg ext3) * then we can have clean buffers against a dirty folio. We * clean the folio here; otherwise the VM will never notice * that the filesystem did any IO at all. * * Also, during truncate, discard_buffer will have marked all * the folio's buffers clean. We discover that here and clean * the folio also. * * i_private_lock must be held over this entire operation in order * to synchronise against block_dirty_folio and prevent the * dirty bit from being lost. */ if (ret) folio_cancel_dirty(folio); spin_unlock(&mapping->i_private_lock); out: if (buffers_to_free) { struct buffer_head *bh = buffers_to_free; do { struct buffer_head *next = bh->b_this_page; free_buffer_head(bh); bh = next; } while (bh != buffers_to_free); } return ret; } EXPORT_SYMBOL(try_to_free_buffers); /* * Buffer-head allocation */ static struct kmem_cache *bh_cachep __ro_after_init; /* * Once the number of bh's in the machine exceeds this level, we start * stripping them in writeback. */ static unsigned long max_buffer_heads __ro_after_init; int buffer_heads_over_limit; struct bh_accounting { int nr; /* Number of live bh's */ int ratelimit; /* Limit cacheline bouncing */ }; static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0}; static void recalc_bh_state(void) { int i; int tot = 0; if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096) return; __this_cpu_write(bh_accounting.ratelimit, 0); for_each_online_cpu(i) tot += per_cpu(bh_accounting, i).nr; buffer_heads_over_limit = (tot > max_buffer_heads); } struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); spin_lock_init(&ret->b_uptodate_lock); preempt_disable(); __this_cpu_inc(bh_accounting.nr); recalc_bh_state(); preempt_enable(); } return ret; } EXPORT_SYMBOL(alloc_buffer_head); void free_buffer_head(struct buffer_head *bh) { BUG_ON(!list_empty(&bh->b_assoc_buffers)); kmem_cache_free(bh_cachep, bh); preempt_disable(); __this_cpu_dec(bh_accounting.nr); recalc_bh_state(); preempt_enable(); } EXPORT_SYMBOL(free_buffer_head); static int buffer_exit_cpu_dead(unsigned int cpu) { int i; struct bh_lru *b = &per_cpu(bh_lrus, cpu); for (i = 0; i < BH_LRU_SIZE; i++) { brelse(b->bhs[i]); b->bhs[i] = NULL; } this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr); per_cpu(bh_accounting, cpu).nr = 0; return 0; } /** * bh_uptodate_or_lock - Test whether the buffer is uptodate * @bh: struct buffer_head * * Return true if the buffer is up-to-date and false, * with the buffer locked, if not. */ int bh_uptodate_or_lock(struct buffer_head *bh) { if (!buffer_uptodate(bh)) { lock_buffer(bh); if (!buffer_uptodate(bh)) return 0; unlock_buffer(bh); } return 1; } EXPORT_SYMBOL(bh_uptodate_or_lock); /** * __bh_read - Submit read for a locked buffer * @bh: struct buffer_head * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ * @wait: wait until reading finish * * Returns zero on success or don't wait, and -EIO on error. */ int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait) { int ret = 0; BUG_ON(!buffer_locked(bh)); get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ | op_flags, bh); if (wait) { wait_on_buffer(bh); if (!buffer_uptodate(bh)) ret = -EIO; } return ret; } EXPORT_SYMBOL(__bh_read); /** * __bh_read_batch - Submit read for a batch of unlocked buffers * @nr: entry number of the buffer batch * @bhs: a batch of struct buffer_head * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ * @force_lock: force to get a lock on the buffer if set, otherwise drops any * buffer that cannot lock. * * Returns zero on success or don't wait, and -EIO on error. */ void __bh_read_batch(int nr, struct buffer_head *bhs[], blk_opf_t op_flags, bool force_lock) { int i; for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; if (buffer_uptodate(bh)) continue; if (force_lock) lock_buffer(bh); else if (!trylock_buffer(bh)) continue; if (buffer_uptodate(bh)) { unlock_buffer(bh); continue; } bh->b_end_io = end_buffer_read_sync; get_bh(bh); submit_bh(REQ_OP_READ | op_flags, bh); } } EXPORT_SYMBOL(__bh_read_batch); void __init buffer_init(void) { unsigned long nrpages; int ret; bh_cachep = KMEM_CACHE(buffer_head, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC); /* * Limit the bh occupancy to 10% of ZONE_NORMAL */ nrpages = (nr_free_buffer_pages() * 10) / 100; max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead", NULL, buffer_exit_cpu_dead); WARN_ON(ret < 0); } |
10 10 10 10 10 10 10 1 6 1 6 6 6 5 6 6 5 1 6 6 6 1 1 5 5 1 5 5 5 1 5 1 4 4 4 4 1 1 1 1 1 13 13 13 12 9 1 4 1 1 1 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/dccp/input.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/dccp.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/sock.h> #include "ackvec.h" #include "ccid.h" #include "dccp.h" /* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb) { __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); sk->sk_data_ready(sk); } static void dccp_fin(struct sock *sk, struct sk_buff *skb) { /* * On receiving Close/CloseReq, both RD/WR shutdown are performed. * RFC 4340, 8.3 says that we MAY send further Data/DataAcks after * receiving the closing segment, but there is no guarantee that such * data will be processed at all. */ sk->sk_shutdown = SHUTDOWN_MASK; sock_set_flag(sk, SOCK_DONE); dccp_enqueue_skb(sk, skb); } static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb) { int queued = 0; switch (sk->sk_state) { /* * We ignore Close when received in one of the following states: * - CLOSED (may be a late or duplicate packet) * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier) * - RESPOND (already handled by dccp_check_req) */ case DCCP_CLOSING: /* * Simultaneous-close: receiving a Close after sending one. This * can happen if both client and server perform active-close and * will result in an endless ping-pong of crossing and retrans- * mitted Close packets, which only terminates when one of the * nodes times out (min. 64 seconds). Quicker convergence can be * achieved when one of the nodes acts as tie-breaker. * This is ok as both ends are done with data transfer and each * end is just waiting for the other to acknowledge termination. */ if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) break; fallthrough; case DCCP_REQUESTING: case DCCP_ACTIVE_CLOSEREQ: dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); dccp_done(sk); break; case DCCP_OPEN: case DCCP_PARTOPEN: /* Give waiting application a chance to read pending data */ queued = 1; dccp_fin(sk, skb); dccp_set_state(sk, DCCP_PASSIVE_CLOSE); fallthrough; case DCCP_PASSIVE_CLOSE: /* * Retransmitted Close: we have already enqueued the first one. */ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); } return queued; } static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) { int queued = 0; /* * Step 7: Check for unexpected packet types * If (S.is_server and P.type == CloseReq) * Send Sync packet acknowledging P.seqno * Drop packet and return */ if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return queued; } /* Step 13: process relevant Client states < CLOSEREQ */ switch (sk->sk_state) { case DCCP_REQUESTING: dccp_send_close(sk, 0); dccp_set_state(sk, DCCP_CLOSING); break; case DCCP_OPEN: case DCCP_PARTOPEN: /* Give waiting application a chance to read pending data */ queued = 1; dccp_fin(sk, skb); dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ); fallthrough; case DCCP_PASSIVE_CLOSEREQ: sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); } return queued; } static u16 dccp_reset_code_convert(const u8 code) { static const u16 error_code[] = { [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ [DCCP_RESET_CODE_ABORTED] = ECONNRESET, [DCCP_RESET_CODE_NO_CONNECTION] = ECONNREFUSED, [DCCP_RESET_CODE_CONNECTION_REFUSED] = ECONNREFUSED, [DCCP_RESET_CODE_TOO_BUSY] = EUSERS, [DCCP_RESET_CODE_AGGRESSION_PENALTY] = EDQUOT, [DCCP_RESET_CODE_PACKET_ERROR] = ENOMSG, [DCCP_RESET_CODE_BAD_INIT_COOKIE] = EBADR, [DCCP_RESET_CODE_BAD_SERVICE_CODE] = EBADRQC, [DCCP_RESET_CODE_OPTION_ERROR] = EILSEQ, [DCCP_RESET_CODE_MANDATORY_ERROR] = EOPNOTSUPP, }; return code >= DCCP_MAX_RESET_CODES ? 0 : error_code[code]; } static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) { u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); sk->sk_err = err; /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ dccp_fin(sk, skb); if (err && !sock_flag(sk, SOCK_DEAD)) sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); dccp_time_wait(sk, DCCP_TIME_WAIT, 0); } static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) { struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; if (av == NULL) return; if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); dccp_ackvec_input(av, skb); } static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) { const struct dccp_sock *dp = dccp_sk(sk); /* Don't deliver to RX CCID when node has shut down read end. */ if (!(sk->sk_shutdown & RCV_SHUTDOWN)) ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); /* * Until the TX queue has been drained, we can not honour SHUT_WR, since * we need received feedback as input to adjust congestion control. */ if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN)) ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); } static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_sock *dp = dccp_sk(sk); u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq, ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; /* * Step 5: Prepare sequence numbers for Sync * If P.type == Sync or P.type == SyncAck, * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, * / * P is valid, so update sequence number variables * accordingly. After this update, P will pass the tests * in Step 6. A SyncAck is generated if necessary in * Step 15 * / * Update S.GSR, S.SWL, S.SWH * Otherwise, * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { if (between48(ackno, dp->dccps_awl, dp->dccps_awh) && dccp_delta_seqno(dp->dccps_swl, seqno) >= 0) dccp_update_gsr(sk, seqno); else return -1; } /* * Step 6: Check sequence numbers * Let LSWL = S.SWL and LAWL = S.AWL * If P.type == CloseReq or P.type == Close or P.type == Reset, * LSWL := S.GSR + 1, LAWL := S.GAR * If LSWL <= P.seqno <= S.SWH * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), * Update S.GSR, S.SWL, S.SWH * If P.type != Sync, * Update S.GAR */ lswl = dp->dccps_swl; lawl = dp->dccps_awl; if (dh->dccph_type == DCCP_PKT_CLOSEREQ || dh->dccph_type == DCCP_PKT_CLOSE || dh->dccph_type == DCCP_PKT_RESET) { lswl = ADD48(dp->dccps_gsr, 1); lawl = dp->dccps_gar; } if (between48(seqno, lswl, dp->dccps_swh) && (ackno == DCCP_PKT_WITHOUT_ACK_SEQ || between48(ackno, lawl, dp->dccps_awh))) { dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && ackno != DCCP_PKT_WITHOUT_ACK_SEQ && after48(ackno, dp->dccps_gar)) dp->dccps_gar = ackno; } else { unsigned long now = jiffies; /* * Step 6: Check sequence numbers * Otherwise, * If P.type == Reset, * Send Sync packet acknowledging S.GSR * Otherwise, * Send Sync packet acknowledging P.seqno * Drop packet and return * * These Syncs are rate-limited as per RFC 4340, 7.5.4: * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second. */ if (time_before(now, (dp->dccps_rate_last + sysctl_dccp_sync_ratelimit))) return -1; DCCP_WARN("Step 6 failed for %s packet, " "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " "sending SYNC...\n", dccp_packet_name(dh->dccph_type), (unsigned long long) lswl, (unsigned long long) seqno, (unsigned long long) dp->dccps_swh, (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", (unsigned long long) lawl, (unsigned long long) ackno, (unsigned long long) dp->dccps_awh); dp->dccps_rate_last = now; if (dh->dccph_type == DCCP_PKT_RESET) seqno = dp->dccps_gsr; dccp_send_sync(sk, seqno, DCCP_PKT_SYNC); return -1; } return 0; } static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); switch (dccp_hdr(skb)->dccph_type) { case DCCP_PKT_DATAACK: case DCCP_PKT_DATA: /* * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening" * - sk_receive_queue is full, use Code 2, "Receive Buffer" */ dccp_enqueue_skb(sk, skb); return 0; case DCCP_PKT_ACK: goto discard; case DCCP_PKT_RESET: /* * Step 9: Process Reset * If P.type == Reset, * Tear down connection * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return */ dccp_rcv_reset(sk, skb); return 0; case DCCP_PKT_CLOSEREQ: if (dccp_rcv_closereq(sk, skb)) return 0; goto discard; case DCCP_PKT_CLOSE: if (dccp_rcv_close(sk, skb)) return 0; goto discard; case DCCP_PKT_REQUEST: /* Step 7 * or (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state >= OPEN and P.type == Request * and P.seqno >= S.OSR) * or (S.state >= OPEN and P.type == Response * and P.seqno >= S.OSR) * or (S.state == RESPOND and P.type == Data), * Send Sync packet acknowledging P.seqno * Drop packet and return */ if (dp->dccps_role != DCCP_ROLE_LISTEN) goto send_sync; goto check_seq; case DCCP_PKT_RESPONSE: if (dp->dccps_role != DCCP_ROLE_CLIENT) goto send_sync; check_seq: if (dccp_delta_seqno(dp->dccps_osr, DCCP_SKB_CB(skb)->dccpd_seq) >= 0) { send_sync: dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); } break; case DCCP_PKT_SYNC: dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNCACK); /* * From RFC 4340, sec. 5.7 * * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets * MAY have non-zero-length application data areas, whose * contents receivers MUST ignore. */ goto discard; } DCCP_INC_STATS(DCCP_MIB_INERRS); discard: __kfree_skb(skb); return 0; } int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len) { if (dccp_check_seqno(sk, skb)) goto discard; if (dccp_parse_options(sk, NULL, skb)) return 1; dccp_handle_ackvec_processing(sk, skb); dccp_deliver_input_to_ccids(sk, skb); return __dccp_rcv_established(sk, skb, dh, len); discard: __kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(dccp_rcv_established); static int dccp_rcv_request_sent_state_process(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len) { /* * Step 4: Prepare sequence numbers in REQUEST * If S.state == REQUEST, * If (P.type == Response or P.type == Reset) * and S.AWL <= P.ackno <= S.AWH, * / * Set sequence number variables corresponding to the * other endpoint, so P will pass the tests in Step 6 * / * Set S.GSR, S.ISR, S.SWL, S.SWH * / * Response processing continues in Step 10; Reset * processing continues in Step 9 * / */ if (dh->dccph_type == DCCP_PKT_RESPONSE) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); long tstamp = dccp_timestamp(); if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { dccp_pr_debug("invalid ackno: S.AWL=%llu, " "P.ackno=%llu, S.AWH=%llu\n", (unsigned long long)dp->dccps_awl, (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, (unsigned long long)dp->dccps_awh); goto out_invalid_packet; } /* * If option processing (Step 8) failed, return 1 here so that * dccp_v4_do_rcv() sends a Reset. The Reset code depends on * the option type and is set in dccp_parse_options(). */ if (dccp_parse_options(sk, NULL, skb)) return 1; /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - dp->dccps_options_received.dccpor_timestamp_echo)); /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); WARN_ON(sk->sk_send_head == NULL); kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; /* * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH * is done as part of activating the feature values below, since * these settings depend on the local/remote Sequence Window * features, which were undefined or not confirmed until now. */ dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); /* * Step 10: Process REQUEST state (second part) * If S.state == REQUEST, * / * If we get here, P is a valid Response from the * server (see Step 4), and we should move to * PARTOPEN state. PARTOPEN means send an Ack, * don't send Data packets, retransmit Acks * periodically, and always include any Init Cookie * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer * Continue with S.state == PARTOPEN * / * Step 12 will send the Ack completing the * three-way handshake * / */ dccp_set_state(sk, DCCP_PARTOPEN); /* * If feature negotiation was successful, activate features now; * an activation failure means that this host could not activate * one ore more features (e.g. insufficient memory), which would * leave at least one feature in an undefined state. */ if (dccp_feat_activate_values(sk, &dp->dccps_featneg)) goto unable_to_proceed; /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) || icsk->icsk_accept_queue.rskq_defer_accept) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * * It may be deleted, but with this feature tcpdumps * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ /* * OK, in DCCP we can as well do a similar trick, its * even in the draft, but there is no need for us to * schedule an ack here, as dccp_sendmsg does this for * us, also stated in the draft. -acme */ __kfree_skb(skb); return 0; } dccp_send_ack(sk); return -1; } out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; return 1; unable_to_proceed: DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED; /* * We mark this socket as no longer usable, so that the loop in * dccp_sendmsg() terminates and the application gets notified. */ dccp_set_state(sk, DCCP_CLOSED); sk->sk_err = ECOMM; return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); u32 sample = dp->dccps_options_received.dccpor_timestamp_echo; int queued = 0; switch (dh->dccph_type) { case DCCP_PKT_RESET: inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); break; case DCCP_PKT_DATA: if (sk->sk_state == DCCP_RESPOND) break; fallthrough; case DCCP_PKT_DATAACK: case DCCP_PKT_ACK: /* * FIXME: we should be resetting the PARTOPEN (DELACK) timer * here but only if we haven't used the DELACK timer for * something else, like sending a delayed ack for a TIMESTAMP * echo, etc, for now were not clearing it, sending an extra * ACK when there is nothing else to do in DELACK is not a big * deal after all. */ /* Stop the PARTOPEN timer */ if (sk->sk_state == DCCP_PARTOPEN) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ if (likely(sample)) { long delta = dccp_timestamp() - sample; dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta); } dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_set_state(sk, DCCP_OPEN); if (dh->dccph_type == DCCP_PKT_DATAACK || dh->dccph_type == DCCP_PKT_DATA) { __dccp_rcv_established(sk, skb, dh, len); queued = 1; /* packet was queued (by __dccp_rcv_established) */ } break; } return queued; } int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; bool acceptable; int queued = 0; /* * Step 3: Process LISTEN state * * If S.state == LISTEN, * If P.type == Request or P contains a valid Init Cookie option, * (* Must scan the packet's options to check for Init * Cookies. Only Init Cookies are processed here, * however; other options are processed in Step 8. This * scan need only be performed if the endpoint uses Init * Cookies *) * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair * S.state = RESPOND * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init * Cookies Continue with S.state == RESPOND * (* A Response packet will be generated in Step 11 *) * Otherwise, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { /* It is possible that we process SYN packets from backlog, * so we need to make sure to disable BH and RCU right there. */ rcu_read_lock(); local_bh_disable(); acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); rcu_read_unlock(); if (!acceptable) return 1; consume_skb(skb); return 0; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; /* Caller (dccp_v4_do_rcv) will send Reset */ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } else if (sk->sk_state == DCCP_CLOSED) { dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */ if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb)) goto discard; /* * Step 7: Check for unexpected packet types * If (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state == RESPOND and P.type == Data), * Send Sync packet acknowledging P.seqno * Drop packet and return */ if ((dp->dccps_role != DCCP_ROLE_CLIENT && dh->dccph_type == DCCP_PKT_RESPONSE) || (dp->dccps_role == DCCP_ROLE_CLIENT && dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } /* Step 8: Process options */ if (dccp_parse_options(sk, NULL, skb)) return 1; /* * Step 9: Process Reset * If P.type == Reset, * Tear down connection * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { dccp_rcv_reset(sk, skb); return 0; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */ if (dccp_rcv_closereq(sk, skb)) return 0; goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */ if (dccp_rcv_close(sk, skb)) return 0; goto discard; } switch (sk->sk_state) { case DCCP_REQUESTING: queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) return queued; __kfree_skb(skb); return 0; case DCCP_PARTOPEN: /* Step 8: if using Ack Vectors, mark packet acknowledgeable */ dccp_handle_ackvec_processing(sk, skb); dccp_deliver_input_to_ccids(sk, skb); fallthrough; case DCCP_RESPOND: queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); break; } if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); break; } } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); goto discard; } if (!queued) { discard: __kfree_skb(skb); } return 0; } EXPORT_SYMBOL_GPL(dccp_rcv_state_process); /** * dccp_sample_rtt - Validate and finalise computation of RTT sample * @sk: socket structure * @delta: number of microseconds between packet and acknowledgment * * The routine is kept generic to work in different contexts. It should be * called immediately when the ACK used for the RTT sample arrives. */ u32 dccp_sample_rtt(struct sock *sk, long delta) { /* dccpor_elapsed_time is either zeroed out or set and > 0 */ delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10; if (unlikely(delta <= 0)) { DCCP_WARN("unusable RTT sample %ld, using min\n", delta); return DCCP_SANE_RTT_MIN; } if (unlikely(delta > DCCP_SANE_RTT_MAX)) { DCCP_WARN("RTT sample %ld too large, using max\n", delta); return DCCP_SANE_RTT_MAX; } return delta; } |
11 11 11 11 10 6 2 4 1 1 1 1 7 14 14 12 1 1 10 9 8 7 7 4 4 2 10 12 12 2 2 2 2 9 9 8 1 2 5 6 2 2 4 8 875 873 8 1 22 14 22 3 1 1 1 1 1 11 2 1 3 5 5 4 8 3 3 17 2 9 7 6 10 15 2 7 2 2 4 3 3 5 8 5 4 3 6 8 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 2 1 1 2 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 | // SPDX-License-Identifier: GPL-2.0-or-later /* * NetLabel Unlabeled Support * * This file defines functions for dealing with unlabeled packets for the * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008 */ #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/string.h> #include <linux/skbuff.h> #include <linux/audit.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/security.h> #include <linux/slab.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/net_namespace.h> #include <net/netlabel.h> #include <asm/bug.h> #include <linux/atomic.h> #include "netlabel_user.h" #include "netlabel_addrlist.h" #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" #include "netlabel_mgmt.h" /* NOTE: at present we always use init's network namespace since we don't * presently support different namespaces even though the majority of * the functions in this file are "namespace safe" */ /* The unlabeled connection hash table which we use to map network interfaces * and addresses of unlabeled packets to a user specified secid value for the * LSM. The hash table is used to lookup the network interface entry * (struct netlbl_unlhsh_iface) and then the interface entry is used to * lookup an IP address match from an ordered list. If a network interface * match can not be found in the hash table then the default entry * (netlbl_unlhsh_def) is used. The IP address entry list * (struct netlbl_unlhsh_addr) is ordered such that the entries with a * larger netmask come first. */ struct netlbl_unlhsh_tbl { struct list_head *tbl; u32 size; }; #define netlbl_unlhsh_addr4_entry(iter) \ container_of(iter, struct netlbl_unlhsh_addr4, list) struct netlbl_unlhsh_addr4 { u32 secid; struct netlbl_af4list list; struct rcu_head rcu; }; #define netlbl_unlhsh_addr6_entry(iter) \ container_of(iter, struct netlbl_unlhsh_addr6, list) struct netlbl_unlhsh_addr6 { u32 secid; struct netlbl_af6list list; struct rcu_head rcu; }; struct netlbl_unlhsh_iface { int ifindex; struct list_head addr4_list; struct list_head addr6_list; u32 valid; struct list_head list; struct rcu_head rcu; }; /* Argument struct for netlbl_unlhsh_walk() */ struct netlbl_unlhsh_walk_arg { struct netlink_callback *nl_cb; struct sk_buff *skb; u32 seq; }; /* Unlabeled connection hash table */ /* updates should be so rare that having one spinlock for the entire * hash table should be okay */ static DEFINE_SPINLOCK(netlbl_unlhsh_lock); #define netlbl_unlhsh_rcu_deref(p) \ rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock)) static struct netlbl_unlhsh_tbl __rcu *netlbl_unlhsh; static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def; /* Accept unlabeled packets flag */ static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ static struct genl_family netlbl_unlabel_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, [NLBL_UNLABEL_A_IPV6ADDR] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr) }, [NLBL_UNLABEL_A_IPV6MASK] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr) }, [NLBL_UNLABEL_A_IPV4ADDR] = { .type = NLA_BINARY, .len = sizeof(struct in_addr) }, [NLBL_UNLABEL_A_IPV4MASK] = { .type = NLA_BINARY, .len = sizeof(struct in_addr) }, [NLBL_UNLABEL_A_IFACE] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY } }; /* * Unlabeled Connection Hash Table Functions */ /** * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table * @entry: the entry's RCU field * * Description: * This function is designed to be used as a callback to the call_rcu() * function so that memory allocated to a hash table interface entry can be * released safely. It is important to note that this function does not free * the IPv4 and IPv6 address lists contained as part of an interface entry. It * is up to the rest of the code to make sure an interface entry is only freed * once it's address lists are empty. * */ static void netlbl_unlhsh_free_iface(struct rcu_head *entry) { struct netlbl_unlhsh_iface *iface; struct netlbl_af4list *iter4; struct netlbl_af4list *tmp4; #if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; struct netlbl_af6list *tmp6; #endif /* IPv6 */ iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); /* no need for locks here since we are the only one with access to this * structure */ netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) { netlbl_af4list_remove_entry(iter4); kfree(netlbl_unlhsh_addr4_entry(iter4)); } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) { netlbl_af6list_remove_entry(iter6); kfree(netlbl_unlhsh_addr6_entry(iter6)); } #endif /* IPv6 */ kfree(iface); } /** * netlbl_unlhsh_hash - Hashing function for the hash table * @ifindex: the network interface/device to hash * * Description: * This is the hashing function for the unlabeled hash table, it returns the * bucket number for the given device/interface. The caller is responsible for * ensuring that the hash table is protected with either a RCU read lock or * the hash table lock. * */ static u32 netlbl_unlhsh_hash(int ifindex) { return ifindex & (netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->size - 1); } /** * netlbl_unlhsh_search_iface - Search for a matching interface entry * @ifindex: the network interface * * Description: * Searches the unlabeled connection hash table and returns a pointer to the * interface entry which matches @ifindex, otherwise NULL is returned. The * caller is responsible for ensuring that the hash table is protected with * either a RCU read lock or the hash table lock. * */ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) { u32 bkt; struct list_head *bkt_list; struct netlbl_unlhsh_iface *iter; bkt = netlbl_unlhsh_hash(ifindex); bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]; list_for_each_entry_rcu(iter, bkt_list, list, lockdep_is_held(&netlbl_unlhsh_lock)) if (iter->valid && iter->ifindex == ifindex) return iter; return NULL; } /** * netlbl_unlhsh_add_addr4 - Add a new IPv4 address entry to the hash table * @iface: the associated interface entry * @addr: IPv4 address in network byte order * @mask: IPv4 address mask in network byte order * @secid: LSM secid value for entry * * Description: * Add a new address entry into the unlabeled connection hash table using the * interface entry specified by @iface. On success zero is returned, otherwise * a negative value is returned. * */ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface, const struct in_addr *addr, const struct in_addr *mask, u32 secid) { int ret_val; struct netlbl_unlhsh_addr4 *entry; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; entry->list.addr = addr->s_addr & mask->s_addr; entry->list.mask = mask->s_addr; entry->list.valid = 1; entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); if (ret_val != 0) kfree(entry); return ret_val; } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table * @iface: the associated interface entry * @addr: IPv6 address in network byte order * @mask: IPv6 address mask in network byte order * @secid: LSM secid value for entry * * Description: * Add a new address entry into the unlabeled connection hash table using the * interface entry specified by @iface. On success zero is returned, otherwise * a negative value is returned. * */ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface, const struct in6_addr *addr, const struct in6_addr *mask, u32 secid) { int ret_val; struct netlbl_unlhsh_addr6 *entry; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; entry->list.addr = *addr; entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; entry->list.mask = *mask; entry->list.valid = 1; entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); if (ret_val != 0) kfree(entry); return 0; } #endif /* IPv6 */ /** * netlbl_unlhsh_add_iface - Adds a new interface entry to the hash table * @ifindex: network interface * * Description: * Add a new, empty, interface entry into the unlabeled connection hash table. * On success a pointer to the new interface entry is returned, on failure NULL * is returned. * */ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex) { u32 bkt; struct netlbl_unlhsh_iface *iface; iface = kzalloc(sizeof(*iface), GFP_ATOMIC); if (iface == NULL) return NULL; iface->ifindex = ifindex; INIT_LIST_HEAD(&iface->addr4_list); INIT_LIST_HEAD(&iface->addr6_list); iface->valid = 1; spin_lock(&netlbl_unlhsh_lock); if (ifindex > 0) { bkt = netlbl_unlhsh_hash(ifindex); if (netlbl_unlhsh_search_iface(ifindex) != NULL) goto add_iface_failure; list_add_tail_rcu(&iface->list, &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]); } else { INIT_LIST_HEAD(&iface->list); if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL) goto add_iface_failure; rcu_assign_pointer(netlbl_unlhsh_def, iface); } spin_unlock(&netlbl_unlhsh_lock); return iface; add_iface_failure: spin_unlock(&netlbl_unlhsh_lock); kfree(iface); return NULL; } /** * netlbl_unlhsh_add - Adds a new entry to the unlabeled connection hash table * @net: network namespace * @dev_name: interface name * @addr: IP address in network byte order * @mask: address mask in network byte order * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6) * @secid: LSM secid value for the entry * @audit_info: NetLabel audit information * * Description: * Adds a new entry to the unlabeled connection hash table. Returns zero on * success, negative values on failure. * */ int netlbl_unlhsh_add(struct net *net, const char *dev_name, const void *addr, const void *mask, u32 addr_len, u32 secid, struct netlbl_audit *audit_info) { int ret_val; int ifindex; struct net_device *dev; struct netlbl_unlhsh_iface *iface; struct audit_buffer *audit_buf = NULL; char *secctx = NULL; u32 secctx_len; if (addr_len != sizeof(struct in_addr) && addr_len != sizeof(struct in6_addr)) return -EINVAL; rcu_read_lock(); if (dev_name != NULL) { dev = dev_get_by_name_rcu(net, dev_name); if (dev == NULL) { ret_val = -ENODEV; goto unlhsh_add_return; } ifindex = dev->ifindex; iface = netlbl_unlhsh_search_iface(ifindex); } else { ifindex = 0; iface = rcu_dereference(netlbl_unlhsh_def); } if (iface == NULL) { iface = netlbl_unlhsh_add_iface(ifindex); if (iface == NULL) { ret_val = -ENOMEM; goto unlhsh_add_return; } } audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCADD, audit_info); switch (addr_len) { case sizeof(struct in_addr): { const struct in_addr *addr4 = addr; const struct in_addr *mask4 = mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) netlbl_af4list_audit_addr(audit_buf, 1, dev_name, addr4->s_addr, mask4->s_addr); break; } #if IS_ENABLED(CONFIG_IPV6) case sizeof(struct in6_addr): { const struct in6_addr *addr6 = addr; const struct in6_addr *mask6 = mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) netlbl_af6list_audit_addr(audit_buf, 1, dev_name, addr6, mask6); break; } #endif /* IPv6 */ default: ret_val = -EINVAL; } if (ret_val == 0) atomic_inc(&netlabel_mgmt_protocount); unlhsh_add_return: rcu_read_unlock(); if (audit_buf != NULL) { if (security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * netlbl_unlhsh_remove_addr4 - Remove an IPv4 address entry * @net: network namespace * @iface: interface entry * @addr: IP address * @mask: IP address mask * @audit_info: NetLabel audit information * * Description: * Remove an IP address entry from the unlabeled connection hash table. * Returns zero on success, negative values on failure. * */ static int netlbl_unlhsh_remove_addr4(struct net *net, struct netlbl_unlhsh_iface *iface, const struct in_addr *addr, const struct in_addr *mask, struct netlbl_audit *audit_info) { struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; struct audit_buffer *audit_buf; struct net_device *dev; char *secctx; u32 secctx_len; spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); if (list_entry != NULL) entry = netlbl_unlhsh_addr4_entry(list_entry); else entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); netlbl_af4list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr->s_addr, mask->s_addr); dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } if (entry == NULL) return -ENOENT; kfree_rcu(entry, rcu); return 0; } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry * @net: network namespace * @iface: interface entry * @addr: IP address * @mask: IP address mask * @audit_info: NetLabel audit information * * Description: * Remove an IP address entry from the unlabeled connection hash table. * Returns zero on success, negative values on failure. * */ static int netlbl_unlhsh_remove_addr6(struct net *net, struct netlbl_unlhsh_iface *iface, const struct in6_addr *addr, const struct in6_addr *mask, struct netlbl_audit *audit_info) { struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; struct audit_buffer *audit_buf; struct net_device *dev; char *secctx; u32 secctx_len; spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); if (list_entry != NULL) entry = netlbl_unlhsh_addr6_entry(list_entry); else entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); netlbl_af6list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr, mask); dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } if (entry == NULL) return -ENOENT; kfree_rcu(entry, rcu); return 0; } #endif /* IPv6 */ /** * netlbl_unlhsh_condremove_iface - Remove an interface entry * @iface: the interface entry * * Description: * Remove an interface entry from the unlabeled connection hash table if it is * empty. An interface entry is considered to be empty if there are no * address entries assigned to it. * */ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) { struct netlbl_af4list *iter4; #if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; #endif /* IPv6 */ spin_lock(&netlbl_unlhsh_lock); netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list) goto unlhsh_condremove_failure; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list) goto unlhsh_condremove_failure; #endif /* IPv6 */ iface->valid = 0; if (iface->ifindex > 0) list_del_rcu(&iface->list); else RCU_INIT_POINTER(netlbl_unlhsh_def, NULL); spin_unlock(&netlbl_unlhsh_lock); call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); return; unlhsh_condremove_failure: spin_unlock(&netlbl_unlhsh_lock); } /** * netlbl_unlhsh_remove - Remove an entry from the unlabeled hash table * @net: network namespace * @dev_name: interface name * @addr: IP address in network byte order * @mask: address mask in network byte order * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6) * @audit_info: NetLabel audit information * * Description: * Removes and existing entry from the unlabeled connection hash table. * Returns zero on success, negative values on failure. * */ int netlbl_unlhsh_remove(struct net *net, const char *dev_name, const void *addr, const void *mask, u32 addr_len, struct netlbl_audit *audit_info) { int ret_val; struct net_device *dev; struct netlbl_unlhsh_iface *iface; if (addr_len != sizeof(struct in_addr) && addr_len != sizeof(struct in6_addr)) return -EINVAL; rcu_read_lock(); if (dev_name != NULL) { dev = dev_get_by_name_rcu(net, dev_name); if (dev == NULL) { ret_val = -ENODEV; goto unlhsh_remove_return; } iface = netlbl_unlhsh_search_iface(dev->ifindex); } else iface = rcu_dereference(netlbl_unlhsh_def); if (iface == NULL) { ret_val = -ENOENT; goto unlhsh_remove_return; } switch (addr_len) { case sizeof(struct in_addr): ret_val = netlbl_unlhsh_remove_addr4(net, iface, addr, mask, audit_info); break; #if IS_ENABLED(CONFIG_IPV6) case sizeof(struct in6_addr): ret_val = netlbl_unlhsh_remove_addr6(net, iface, addr, mask, audit_info); break; #endif /* IPv6 */ default: ret_val = -EINVAL; } if (ret_val == 0) { netlbl_unlhsh_condremove_iface(iface); atomic_dec(&netlabel_mgmt_protocount); } unlhsh_remove_return: rcu_read_unlock(); return ret_val; } /* * General Helper Functions */ /** * netlbl_unlhsh_netdev_handler - Network device notification handler * @this: notifier block * @event: the event * @ptr: the netdevice notifier info (cast to void) * * Description: * Handle network device events, although at present all we care about is a * network device going away. In the case of a device going away we clear any * related entries from the unlabeled connection hash table. * */ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netlbl_unlhsh_iface *iface = NULL; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */ if (event == NETDEV_DOWN) { spin_lock(&netlbl_unlhsh_lock); iface = netlbl_unlhsh_search_iface(dev->ifindex); if (iface != NULL && iface->valid) { iface->valid = 0; list_del_rcu(&iface->list); } else iface = NULL; spin_unlock(&netlbl_unlhsh_lock); } if (iface != NULL) call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); return NOTIFY_DONE; } /** * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag * @value: desired value * @audit_info: NetLabel audit information * * Description: * Set the value of the unlabeled accept flag to @value. * */ static void netlbl_unlabel_acceptflg_set(u8 value, struct netlbl_audit *audit_info) { struct audit_buffer *audit_buf; u8 old_val; old_val = netlabel_unlabel_acceptflg; netlabel_unlabel_acceptflg = value; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, audit_info); if (audit_buf != NULL) { audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val); audit_log_end(audit_buf); } } /** * netlbl_unlabel_addrinfo_get - Get the IPv4/6 address information * @info: the Generic NETLINK info block * @addr: the IP address * @mask: the IP address mask * @len: the address length * * Description: * Examine the Generic NETLINK message and extract the IP address information. * Returns zero on success, negative values on failure. * */ static int netlbl_unlabel_addrinfo_get(struct genl_info *info, void **addr, void **mask, u32 *len) { u32 addr_len; if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); if (addr_len != sizeof(struct in_addr) && addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) return -EINVAL; *len = addr_len; *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4MASK]); return 0; } else if (info->attrs[NLBL_UNLABEL_A_IPV6ADDR]) { addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); if (addr_len != sizeof(struct in6_addr) && addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV6MASK])) return -EINVAL; *len = addr_len; *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6MASK]); return 0; } return -EINVAL; } /* * NetLabel Command Handlers */ /** * netlbl_unlabel_accept - Handle an ACCEPT message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated ACCEPT message and set the accept flag accordingly. * Returns zero on success, negative values on failure. * */ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { u8 value; struct netlbl_audit audit_info; if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { netlbl_netlink_auditinfo(&audit_info); netlbl_unlabel_acceptflg_set(value, &audit_info); return 0; } } return -EINVAL; } /** * netlbl_unlabel_list - Handle a LIST message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated LIST message and respond with the current status. * Returns zero on success, negative values on failure. * */ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; struct sk_buff *ans_skb; void *data; ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) goto list_failure; data = genlmsg_put_reply(ans_skb, info, &netlbl_unlabel_gnl_family, 0, NLBL_UNLABEL_C_LIST); if (data == NULL) { ret_val = -ENOMEM; goto list_failure; } ret_val = nla_put_u8(ans_skb, NLBL_UNLABEL_A_ACPTFLG, netlabel_unlabel_acceptflg); if (ret_val != 0) goto list_failure; genlmsg_end(ans_skb, data); return genlmsg_reply(ans_skb, info); list_failure: kfree_skb(ans_skb); return ret_val; } /** * netlbl_unlabel_staticadd - Handle a STATICADD message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICADD message and add a new unlabeled * connection entry to the hash table. Returns zero on success, negative * values on failure. * */ static int netlbl_unlabel_staticadd(struct sk_buff *skb, struct genl_info *info) { int ret_val; char *dev_name; void *addr; void *mask; u32 addr_len; u32 secid; struct netlbl_audit audit_info; /* Don't allow users to add both IPv4 and IPv6 addresses for a * single entry. However, allow users to create two entries, one each * for IPv4 and IPv6, with the same LSM security context which should * achieve the same result. */ if (!info->attrs[NLBL_UNLABEL_A_SECCTX] || !info->attrs[NLBL_UNLABEL_A_IFACE] || !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]); ret_val = security_secctx_to_secid( nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]), nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]), &secid); if (ret_val != 0) return ret_val; return netlbl_unlhsh_add(&init_net, dev_name, addr, mask, addr_len, secid, &audit_info); } /** * netlbl_unlabel_staticadddef - Handle a STATICADDDEF message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICADDDEF message and add a new default * unlabeled connection entry. Returns zero on success, negative values on * failure. * */ static int netlbl_unlabel_staticadddef(struct sk_buff *skb, struct genl_info *info) { int ret_val; void *addr; void *mask; u32 addr_len; u32 secid; struct netlbl_audit audit_info; /* Don't allow users to add both IPv4 and IPv6 addresses for a * single entry. However, allow users to create two entries, one each * for IPv4 and IPv6, with the same LSM security context which should * achieve the same result. */ if (!info->attrs[NLBL_UNLABEL_A_SECCTX] || !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; ret_val = security_secctx_to_secid( nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]), nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]), &secid); if (ret_val != 0) return ret_val; return netlbl_unlhsh_add(&init_net, NULL, addr, mask, addr_len, secid, &audit_info); } /** * netlbl_unlabel_staticremove - Handle a STATICREMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICREMOVE message and remove the specified * unlabeled connection entry. Returns zero on success, negative values on * failure. * */ static int netlbl_unlabel_staticremove(struct sk_buff *skb, struct genl_info *info) { int ret_val; char *dev_name; void *addr; void *mask; u32 addr_len; struct netlbl_audit audit_info; /* See the note in netlbl_unlabel_staticadd() about not allowing both * IPv4 and IPv6 in the same entry. */ if (!info->attrs[NLBL_UNLABEL_A_IFACE] || !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]); return netlbl_unlhsh_remove(&init_net, dev_name, addr, mask, addr_len, &audit_info); } /** * netlbl_unlabel_staticremovedef - Handle a STATICREMOVEDEF message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICREMOVEDEF message and remove the default * unlabeled connection entry. Returns zero on success, negative values on * failure. * */ static int netlbl_unlabel_staticremovedef(struct sk_buff *skb, struct genl_info *info) { int ret_val; void *addr; void *mask; u32 addr_len; struct netlbl_audit audit_info; /* See the note in netlbl_unlabel_staticadd() about not allowing both * IPv4 and IPv6 in the same entry. */ if (!((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; return netlbl_unlhsh_remove(&init_net, NULL, addr, mask, addr_len, &audit_info); } /** * netlbl_unlabel_staticlist_gen - Generate messages for STATICLIST[DEF] * @cmd: command/message * @iface: the interface entry * @addr4: the IPv4 address entry * @addr6: the IPv6 address entry * @arg: the netlbl_unlhsh_walk_arg structure * * Description: * This function is designed to be used to generate a response for a * STATICLIST or STATICLISTDEF message. When called either @addr4 or @addr6 * can be specified, not both, the other unspecified entry should be set to * NULL by the caller. Returns the size of the message on success, negative * values on failure. * */ static int netlbl_unlabel_staticlist_gen(u32 cmd, const struct netlbl_unlhsh_iface *iface, const struct netlbl_unlhsh_addr4 *addr4, const struct netlbl_unlhsh_addr6 *addr6, void *arg) { int ret_val = -ENOMEM; struct netlbl_unlhsh_walk_arg *cb_arg = arg; struct net_device *dev; void *data; u32 secid; char *secctx; u32 secctx_len; data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, NLM_F_MULTI, cmd); if (data == NULL) goto list_cb_failure; if (iface->ifindex > 0) { dev = dev_get_by_index(&init_net, iface->ifindex); if (!dev) { ret_val = -ENODEV; goto list_cb_failure; } ret_val = nla_put_string(cb_arg->skb, NLBL_UNLABEL_A_IFACE, dev->name); dev_put(dev); if (ret_val != 0) goto list_cb_failure; } if (addr4) { struct in_addr addr_struct; addr_struct.s_addr = addr4->list.addr; ret_val = nla_put_in_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV4ADDR, addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; addr_struct.s_addr = addr4->list.mask; ret_val = nla_put_in_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV4MASK, addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; secid = addr4->secid; } else { ret_val = nla_put_in6_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV6ADDR, &addr6->list.addr); if (ret_val != 0) goto list_cb_failure; ret_val = nla_put_in6_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV6MASK, &addr6->list.mask); if (ret_val != 0) goto list_cb_failure; secid = addr6->secid; } ret_val = security_secid_to_secctx(secid, &secctx, &secctx_len); if (ret_val != 0) goto list_cb_failure; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_SECCTX, secctx_len, secctx); security_release_secctx(secctx, secctx_len); if (ret_val != 0) goto list_cb_failure; cb_arg->seq++; genlmsg_end(cb_arg->skb, data); return 0; list_cb_failure: genlmsg_cancel(cb_arg->skb, data); return ret_val; } /** * netlbl_unlabel_staticlist - Handle a STATICLIST message * @skb: the NETLINK buffer * @cb: the NETLINK callback * * Description: * Process a user generated STATICLIST message and dump the unlabeled * connection hash table in a form suitable for use in a kernel generated * STATICLIST message. Returns the length of @skb. * */ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; u32 skip_addr4 = cb->args[2]; u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) u32 skip_addr6 = cb->args[3]; struct netlbl_af6list *addr6; #endif cb_arg.nl_cb = cb; cb_arg.skb = skb; cb_arg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; iter_bkt++) { iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || iter_chain++ < skip_chain) continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, iface, netlbl_unlhsh_addr4_entry(addr4), NULL, &cb_arg) < 0) { iter_addr4--; iter_chain--; goto unlabel_staticlist_return; } } iter_addr4 = 0; skip_addr4 = 0; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, iface, NULL, netlbl_unlhsh_addr6_entry(addr6), &cb_arg) < 0) { iter_addr6--; iter_chain--; goto unlabel_staticlist_return; } } iter_addr6 = 0; skip_addr6 = 0; #endif /* IPv6 */ } iter_chain = 0; skip_chain = 0; } unlabel_staticlist_return: rcu_read_unlock(); cb->args[0] = iter_bkt; cb->args[1] = iter_chain; cb->args[2] = iter_addr4; cb->args[3] = iter_addr6; return skb->len; } /** * netlbl_unlabel_staticlistdef - Handle a STATICLISTDEF message * @skb: the NETLINK buffer * @cb: the NETLINK callback * * Description: * Process a user generated STATICLISTDEF message and dump the default * unlabeled connection entry in a form suitable for use in a kernel generated * STATICLISTDEF message. Returns the length of @skb. * */ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_unlhsh_walk_arg cb_arg; struct netlbl_unlhsh_iface *iface; u32 iter_addr4 = 0, iter_addr6 = 0; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *addr6; #endif cb_arg.nl_cb = cb; cb_arg.skb = skb; cb_arg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); iface = rcu_dereference(netlbl_unlhsh_def); if (iface == NULL || !iface->valid) goto unlabel_staticlistdef_return; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { if (iter_addr4++ < cb->args[0]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, netlbl_unlhsh_addr4_entry(addr4), NULL, &cb_arg) < 0) { iter_addr4--; goto unlabel_staticlistdef_return; } } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { if (iter_addr6++ < cb->args[1]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, NULL, netlbl_unlhsh_addr6_entry(addr6), &cb_arg) < 0) { iter_addr6--; goto unlabel_staticlistdef_return; } } #endif /* IPv6 */ unlabel_staticlistdef_return: rcu_read_unlock(); cb->args[0] = iter_addr4; cb->args[1] = iter_addr6; return skb->len; } /* * NetLabel Generic NETLINK Command Definitions */ static const struct genl_small_ops netlbl_unlabel_genl_ops[] = { { .cmd = NLBL_UNLABEL_C_STATICADD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticadd, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticremove, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLIST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_unlabel_staticlist, }, { .cmd = NLBL_UNLABEL_C_STATICADDDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticadddef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticremovedef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLISTDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_unlabel_staticlistdef, }, { .cmd = NLBL_UNLABEL_C_ACCEPT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_accept, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_LIST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_unlabel_list, .dumpit = NULL, }, }; static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_UNLABEL_A_MAX, .policy = netlbl_unlabel_genl_policy, .module = THIS_MODULE, .small_ops = netlbl_unlabel_genl_ops, .n_small_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), .resv_start_op = NLBL_UNLABEL_C_STATICLISTDEF + 1, }; /* * NetLabel Generic NETLINK Protocol Functions */ /** * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component * * Description: * Register the unlabeled packet NetLabel component with the Generic NETLINK * mechanism. Returns zero on success, negative values on failure. * */ int __init netlbl_unlabel_genl_init(void) { return genl_register_family(&netlbl_unlabel_gnl_family); } /* * NetLabel KAPI Hooks */ static struct notifier_block netlbl_unlhsh_netdev_notifier = { .notifier_call = netlbl_unlhsh_netdev_handler, }; /** * netlbl_unlabel_init - Initialize the unlabeled connection hash table * @size: the number of bits to use for the hash buckets * * Description: * Initializes the unlabeled connection hash table and registers a network * device notification handler. This function should only be called by the * NetLabel subsystem itself during initialization. Returns zero on success, * non-zero values on error. * */ int __init netlbl_unlabel_init(u32 size) { u32 iter; struct netlbl_unlhsh_tbl *hsh_tbl; if (size == 0) return -EINVAL; hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL); if (hsh_tbl == NULL) return -ENOMEM; hsh_tbl->size = 1 << size; hsh_tbl->tbl = kcalloc(hsh_tbl->size, sizeof(struct list_head), GFP_KERNEL); if (hsh_tbl->tbl == NULL) { kfree(hsh_tbl); return -ENOMEM; } for (iter = 0; iter < hsh_tbl->size; iter++) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); spin_lock(&netlbl_unlhsh_lock); rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); spin_unlock(&netlbl_unlhsh_lock); register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier); return 0; } /** * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet * @skb: the packet * @family: protocol family * @secattr: the security attributes * * Description: * Determine the security attributes, if any, for an unlabled packet and return * them in @secattr. Returns zero on success and negative values on failure. * */ int netlbl_unlabel_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { struct netlbl_unlhsh_iface *iface; rcu_read_lock(); iface = netlbl_unlhsh_search_iface(skb->skb_iif); if (iface == NULL) iface = rcu_dereference(netlbl_unlhsh_def); if (iface == NULL || !iface->valid) goto unlabel_getattr_nolabel; #if IS_ENABLED(CONFIG_IPV6) /* When resolving a fallback label, check the sk_buff version as * it is possible (e.g. SCTP) to have family = PF_INET6 while * receiving ip_hdr(skb)->version = 4. */ if (family == PF_INET6 && ip_hdr(skb)->version == 4) family = PF_INET; #endif /* IPv6 */ switch (family) { case PF_INET: { struct iphdr *hdr4; struct netlbl_af4list *addr4; hdr4 = ip_hdr(skb); addr4 = netlbl_af4list_search(hdr4->saddr, &iface->addr4_list); if (addr4 == NULL) goto unlabel_getattr_nolabel; secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid; break; } #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: { struct ipv6hdr *hdr6; struct netlbl_af6list *addr6; hdr6 = ipv6_hdr(skb); addr6 = netlbl_af6list_search(&hdr6->saddr, &iface->addr6_list); if (addr6 == NULL) goto unlabel_getattr_nolabel; secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid; break; } #endif /* IPv6 */ default: goto unlabel_getattr_nolabel; } rcu_read_unlock(); secattr->flags |= NETLBL_SECATTR_SECID; secattr->type = NETLBL_NLTYPE_UNLABELED; return 0; unlabel_getattr_nolabel: rcu_read_unlock(); if (netlabel_unlabel_acceptflg == 0) return -ENOMSG; secattr->type = NETLBL_NLTYPE_UNLABELED; return 0; } /** * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets * * Description: * Set the default NetLabel configuration to allow incoming unlabeled packets * and to send unlabeled network traffic by default. * */ int __init netlbl_unlabel_defconf(void) { int ret_val; struct netlbl_dom_map *entry; struct netlbl_audit audit_info; /* Only the kernel is allowed to call this function and the only time * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ security_current_getsecid_subj(&audit_info.secid); audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; entry->family = AF_UNSPEC; entry->def.type = NETLBL_NLTYPE_UNLABELED; ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; netlbl_unlabel_acceptflg_set(1, &audit_info); return 0; } |
5 4 1 3 1 1 1 3 1 1 1 1 3 1 1 1 1 2 1 1 1 4 1 1 1 1 2 1 1 4 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> #include <net/genetlink.h> #include <net/ncsi.h> #include <linux/skbuff.h> #include <net/sock.h> #include <uapi/linux/ncsi.h> #include "internal.h" #include "ncsi-pkt.h" #include "ncsi-netlink.h" static struct genl_family ncsi_genl_family; static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = { [NCSI_ATTR_IFINDEX] = { .type = NLA_U32 }, [NCSI_ATTR_PACKAGE_LIST] = { .type = NLA_NESTED }, [NCSI_ATTR_PACKAGE_ID] = { .type = NLA_U32 }, [NCSI_ATTR_CHANNEL_ID] = { .type = NLA_U32 }, [NCSI_ATTR_DATA] = { .type = NLA_BINARY, .len = 2048 }, [NCSI_ATTR_MULTI_FLAG] = { .type = NLA_FLAG }, [NCSI_ATTR_PACKAGE_MASK] = { .type = NLA_U32 }, [NCSI_ATTR_CHANNEL_MASK] = { .type = NLA_U32 }, }; static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex) { struct ncsi_dev_priv *ndp; struct net_device *dev; struct ncsi_dev *nd; struct ncsi_dev; if (!net) return NULL; dev = dev_get_by_index(net, ifindex); if (!dev) { pr_err("NCSI netlink: No device for ifindex %u\n", ifindex); return NULL; } nd = ncsi_find_dev(dev); ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; dev_put(dev); return ndp; } static int ncsi_write_channel_info(struct sk_buff *skb, struct ncsi_dev_priv *ndp, struct ncsi_channel *nc) { struct ncsi_channel_vlan_filter *ncf; struct ncsi_channel_mode *m; struct nlattr *vid_nest; int i; nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id); m = &nc->modes[NCSI_MODE_LINK]; nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]); if (nc->state == NCSI_CHANNEL_ACTIVE) nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE); if (nc == nc->package->preferred_channel) nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED); nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major); nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor); nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name); vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); if (!vid_nest) return -ENOMEM; ncf = &nc->vlan_filter; i = -1; while ((i = find_next_bit((void *)&ncf->bitmap, ncf->n_vids, i + 1)) < ncf->n_vids) { if (ncf->vids[i]) nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID, ncf->vids[i]); } nla_nest_end(skb, vid_nest); return 0; } static int ncsi_write_package_info(struct sk_buff *skb, struct ncsi_dev_priv *ndp, unsigned int id) { struct nlattr *pnest, *cnest, *nest; struct ncsi_package *np; struct ncsi_channel *nc; bool found; int rc; if (id > ndp->package_num - 1) { netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id); return -ENODEV; } found = false; NCSI_FOR_EACH_PACKAGE(ndp, np) { if (np->id != id) continue; pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); if (rc) { nla_nest_cancel(skb, pnest); return rc; } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); if (!cnest) { nla_nest_cancel(skb, pnest); return -ENOMEM; } NCSI_FOR_EACH_CHANNEL(np, nc) { nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR); if (!nest) { nla_nest_cancel(skb, cnest); nla_nest_cancel(skb, pnest); return -ENOMEM; } rc = ncsi_write_channel_info(skb, ndp, nc); if (rc) { nla_nest_cancel(skb, nest); nla_nest_cancel(skb, cnest); nla_nest_cancel(skb, pnest); return rc; } nla_nest_end(skb, nest); } nla_nest_end(skb, cnest); nla_nest_end(skb, pnest); found = true; } if (!found) return -ENODEV; return 0; } static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) { struct ncsi_dev_priv *ndp; unsigned int package_id; struct sk_buff *skb; struct nlattr *attr; void *hdr; int rc; if (!info || !info->attrs) return -EINVAL; if (!info->attrs[NCSI_ATTR_IFINDEX]) return -EINVAL; if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) return -EINVAL; ndp = ndp_from_ifindex(genl_info_net(info), nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); if (!ndp) return -ENODEV; skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); if (!hdr) { kfree_skb(skb); return -EMSGSIZE; } package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST); if (!attr) { kfree_skb(skb); return -EMSGSIZE; } rc = ncsi_write_package_info(skb, ndp, package_id); if (rc) { nla_nest_cancel(skb, attr); goto err; } nla_nest_end(skb, attr); genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err: kfree_skb(skb); return rc; } static int ncsi_pkg_info_all_nl(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *attrs[NCSI_ATTR_MAX + 1]; struct ncsi_package *np, *package; struct ncsi_dev_priv *ndp; unsigned int package_id; struct nlattr *attr; void *hdr; int rc; rc = genlmsg_parse_deprecated(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX, ncsi_genl_policy, NULL); if (rc) return rc; if (!attrs[NCSI_ATTR_IFINDEX]) return -EINVAL; ndp = ndp_from_ifindex(get_net(sock_net(skb->sk)), nla_get_u32(attrs[NCSI_ATTR_IFINDEX])); if (!ndp) return -ENODEV; package_id = cb->args[0]; package = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) if (np->id == package_id) package = np; if (!package) return 0; /* done */ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ncsi_genl_family, NLM_F_MULTI, NCSI_CMD_PKG_INFO); if (!hdr) { rc = -EMSGSIZE; goto err; } attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST); if (!attr) { rc = -EMSGSIZE; goto err; } rc = ncsi_write_package_info(skb, ndp, package->id); if (rc) { nla_nest_cancel(skb, attr); goto err; } nla_nest_end(skb, attr); genlmsg_end(skb, hdr); cb->args[0] = package_id + 1; return skb->len; err: genlmsg_cancel(skb, hdr); return rc; } static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info) { struct ncsi_package *np, *package; struct ncsi_channel *nc, *channel; u32 package_id, channel_id; struct ncsi_dev_priv *ndp; unsigned long flags; if (!info || !info->attrs) return -EINVAL; if (!info->attrs[NCSI_ATTR_IFINDEX]) return -EINVAL; if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) return -EINVAL; ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)), nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); if (!ndp) return -ENODEV; package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); package = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) if (np->id == package_id) package = np; if (!package) { /* The user has set a package that does not exist */ return -ERANGE; } channel = NULL; if (info->attrs[NCSI_ATTR_CHANNEL_ID]) { channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]); NCSI_FOR_EACH_CHANNEL(package, nc) if (nc->id == channel_id) { channel = nc; break; } if (!channel) { netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n", channel_id); return -ERANGE; } } spin_lock_irqsave(&ndp->lock, flags); ndp->package_whitelist = 0x1 << package->id; ndp->multi_package = false; spin_unlock_irqrestore(&ndp->lock, flags); spin_lock_irqsave(&package->lock, flags); package->multi_channel = false; if (channel) { package->channel_whitelist = 0x1 << channel->id; package->preferred_channel = channel; } else { /* Allow any channel */ package->channel_whitelist = UINT_MAX; package->preferred_channel = NULL; } spin_unlock_irqrestore(&package->lock, flags); if (channel) netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x as preferred\n", package_id, channel_id); else netdev_info(ndp->ndev.dev, "Set package 0x%x as preferred\n", package_id); /* Update channel configuration */ if (!(ndp->flags & NCSI_DEV_RESET)) ncsi_reset_dev(&ndp->ndev); return 0; } static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info) { struct ncsi_dev_priv *ndp; struct ncsi_package *np; unsigned long flags; if (!info || !info->attrs) return -EINVAL; if (!info->attrs[NCSI_ATTR_IFINDEX]) return -EINVAL; ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)), nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); if (!ndp) return -ENODEV; /* Reset any whitelists and disable multi mode */ spin_lock_irqsave(&ndp->lock, flags); ndp->package_whitelist = UINT_MAX; ndp->multi_package = false; spin_unlock_irqrestore(&ndp->lock, flags); NCSI_FOR_EACH_PACKAGE(ndp, np) { spin_lock_irqsave(&np->lock, flags); np->multi_channel = false; np->channel_whitelist = UINT_MAX; np->preferred_channel = NULL; spin_unlock_irqrestore(&np->lock, flags); } netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n"); /* Update channel configuration */ if (!(ndp->flags & NCSI_DEV_RESET)) ncsi_reset_dev(&ndp->ndev); return 0; } static int ncsi_send_cmd_nl(struct sk_buff *msg, struct genl_info *info) { struct ncsi_dev_priv *ndp; struct ncsi_pkt_hdr *hdr; struct ncsi_cmd_arg nca; unsigned char *data; u32 package_id; u32 channel_id; int len, ret; if (!info || !info->attrs) { ret = -EINVAL; goto out; } if (!info->attrs[NCSI_ATTR_IFINDEX]) { ret = -EINVAL; goto out; } if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) { ret = -EINVAL; goto out; } if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) { ret = -EINVAL; goto out; } if (!info->attrs[NCSI_ATTR_DATA]) { ret = -EINVAL; goto out; } ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)), nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); if (!ndp) { ret = -ENODEV; goto out; } package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]); if (package_id >= NCSI_MAX_PACKAGE || channel_id >= NCSI_MAX_CHANNEL) { ret = -ERANGE; goto out_netlink; } len = nla_len(info->attrs[NCSI_ATTR_DATA]); if (len < sizeof(struct ncsi_pkt_hdr)) { netdev_info(ndp->ndev.dev, "NCSI: no command to send %u\n", package_id); ret = -EINVAL; goto out_netlink; } else { data = (unsigned char *)nla_data(info->attrs[NCSI_ATTR_DATA]); } hdr = (struct ncsi_pkt_hdr *)data; nca.ndp = ndp; nca.package = (unsigned char)package_id; nca.channel = (unsigned char)channel_id; nca.type = hdr->type; nca.req_flags = NCSI_REQ_FLAG_NETLINK_DRIVEN; nca.info = info; nca.payload = ntohs(hdr->length); nca.data = data + sizeof(*hdr); ret = ncsi_xmit_cmd(&nca); out_netlink: if (ret != 0) { netdev_err(ndp->ndev.dev, "NCSI: Error %d sending command\n", ret); ncsi_send_netlink_err(ndp->ndev.dev, info->snd_seq, info->snd_portid, info->nlhdr, ret); } out: return ret; } int ncsi_send_netlink_rsp(struct ncsi_request *nr, struct ncsi_package *np, struct ncsi_channel *nc) { struct sk_buff *skb; struct net *net; void *hdr; int rc; net = dev_net(nr->rsp->dev); skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; hdr = genlmsg_put(skb, nr->snd_portid, nr->snd_seq, &ncsi_genl_family, 0, NCSI_CMD_SEND_CMD); if (!hdr) { kfree_skb(skb); return -EMSGSIZE; } nla_put_u32(skb, NCSI_ATTR_IFINDEX, nr->rsp->dev->ifindex); if (np) nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID, np->id); if (nc) nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, nc->id); else nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, NCSI_RESERVED_CHANNEL); rc = nla_put(skb, NCSI_ATTR_DATA, nr->rsp->len, (void *)nr->rsp->data); if (rc) goto err; genlmsg_end(skb, hdr); return genlmsg_unicast(net, skb, nr->snd_portid); err: kfree_skb(skb); return rc; } int ncsi_send_netlink_timeout(struct ncsi_request *nr, struct ncsi_package *np, struct ncsi_channel *nc) { struct sk_buff *skb; struct net *net; void *hdr; skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; hdr = genlmsg_put(skb, nr->snd_portid, nr->snd_seq, &ncsi_genl_family, 0, NCSI_CMD_SEND_CMD); if (!hdr) { kfree_skb(skb); return -EMSGSIZE; } net = dev_net(nr->cmd->dev); nla_put_u32(skb, NCSI_ATTR_IFINDEX, nr->cmd->dev->ifindex); if (np) nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID, np->id); else nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID, NCSI_PACKAGE_INDEX((((struct ncsi_pkt_hdr *) nr->cmd->data)->channel))); if (nc) nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, nc->id); else nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, NCSI_RESERVED_CHANNEL); genlmsg_end(skb, hdr); return genlmsg_unicast(net, skb, nr->snd_portid); } int ncsi_send_netlink_err(struct net_device *dev, u32 snd_seq, u32 snd_portid, const struct nlmsghdr *nlhdr, int err) { struct nlmsghdr *nlh; struct nlmsgerr *nle; struct sk_buff *skb; struct net *net; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; net = dev_net(dev); nlh = nlmsg_put(skb, snd_portid, snd_seq, NLMSG_ERROR, sizeof(*nle), 0); nle = (struct nlmsgerr *)nlmsg_data(nlh); nle->error = err; memcpy(&nle->msg, nlhdr, sizeof(*nlh)); nlmsg_end(skb, nlh); return nlmsg_unicast(net->genl_sock, skb, snd_portid); } static int ncsi_set_package_mask_nl(struct sk_buff *msg, struct genl_info *info) { struct ncsi_dev_priv *ndp; unsigned long flags; int rc; if (!info || !info->attrs) return -EINVAL; if (!info->attrs[NCSI_ATTR_IFINDEX]) return -EINVAL; if (!info->attrs[NCSI_ATTR_PACKAGE_MASK]) return -EINVAL; ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)), nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); if (!ndp) return -ENODEV; spin_lock_irqsave(&ndp->lock, flags); if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) { if (ndp->flags & NCSI_DEV_HWA) { ndp->multi_package = true; rc = 0; } else { netdev_err(ndp->ndev.dev, "NCSI: Can't use multiple packages without HWA\n"); rc = -EPERM; } } else { ndp->multi_package = false; rc = 0; } if (!rc) ndp->package_whitelist = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_MASK]); spin_unlock_irqrestore(&ndp->lock, flags); if (!rc) { /* Update channel configuration */ if (!(ndp->flags & NCSI_DEV_RESET)) ncsi_reset_dev(&ndp->ndev); } return rc; } static int ncsi_set_channel_mask_nl(struct sk_buff *msg, struct genl_info *info) { struct ncsi_package *np, *package; struct ncsi_channel *nc, *channel; u32 package_id, channel_id; struct ncsi_dev_priv *ndp; unsigned long flags; if (!info || !info->attrs) return -EINVAL; if (!info->attrs[NCSI_ATTR_IFINDEX]) return -EINVAL; if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) return -EINVAL; if (!info->attrs[NCSI_ATTR_CHANNEL_MASK]) return -EINVAL; ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)), nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); if (!ndp) return -ENODEV; package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); package = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) if (np->id == package_id) { package = np; break; } if (!package) return -ERANGE; spin_lock_irqsave(&package->lock, flags); channel = NULL; if (info->attrs[NCSI_ATTR_CHANNEL_ID]) { channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]); NCSI_FOR_EACH_CHANNEL(np, nc) if (nc->id == channel_id) { channel = nc; break; } if (!channel) { spin_unlock_irqrestore(&package->lock, flags); return -ERANGE; } netdev_dbg(ndp->ndev.dev, "NCSI: Channel %u set as preferred channel\n", channel->id); } package->channel_whitelist = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_MASK]); if (package->channel_whitelist == 0) netdev_dbg(ndp->ndev.dev, "NCSI: Package %u set to all channels disabled\n", package->id); package->preferred_channel = channel; if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) { package->multi_channel = true; netdev_info(ndp->ndev.dev, "NCSI: Multi-channel enabled on package %u\n", package_id); } else { package->multi_channel = false; } spin_unlock_irqrestore(&package->lock, flags); /* Update channel configuration */ if (!(ndp->flags & NCSI_DEV_RESET)) ncsi_reset_dev(&ndp->ndev); return 0; } static const struct genl_small_ops ncsi_ops[] = { { .cmd = NCSI_CMD_PKG_INFO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_pkg_info_nl, .dumpit = ncsi_pkg_info_all_nl, .flags = 0, }, { .cmd = NCSI_CMD_SET_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_CLEAR_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_clear_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SEND_CMD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_send_cmd_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_PACKAGE_MASK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_package_mask_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_CHANNEL_MASK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_channel_mask_nl, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family ncsi_genl_family __ro_after_init = { .name = "NCSI", .version = 0, .maxattr = NCSI_ATTR_MAX, .policy = ncsi_genl_policy, .module = THIS_MODULE, .small_ops = ncsi_ops, .n_small_ops = ARRAY_SIZE(ncsi_ops), .resv_start_op = NCSI_CMD_SET_CHANNEL_MASK + 1, }; static int __init ncsi_init_netlink(void) { return genl_register_family(&ncsi_genl_family); } subsys_initcall(ncsi_init_netlink); |
10 9 1 1 9 9 8 1 13 11 1 3 2 1 9 7 1 8 3 5 5 6 8 8 8 8 8 8 8 8 8 8 8 1 7 8 8 8 8 8 8 8 8 8 8 8 4 4 8 16 1 9 5 6 1 1 9 10 10 10 1 3 5 4 3 4 4 3 1 1 2 3 16 1 16 6 6 6 1 4 1 1 1 3 3 6 6 6 7 7 1 1 7 7 4 4 7 1 6 1 6 6 5 1 4 5 2 2 3 3 1 1 1 1 1 1 1 1 1 5 1 3 1 2 1 1 6 6 6 5 2 6 1 6 6 2 2 2 2 2 1 2 1 1 11 11 4 11 6 15 4 11 15 15 15 4 11 6 5 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 | /* * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved. * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved. * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved. * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/bug.h> #include <linux/sched/signal.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/splice.h> #include <crypto/aead.h> #include <net/strparser.h> #include <net/tls.h> #include <trace/events/sock.h> #include "tls.h" struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; bool async_done; u8 tail; ); struct sk_buff *skb; }; struct tls_decrypt_ctx { struct sock *sk; u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; bool free_sgout; struct scatterlist sg[]; }; noinline void tls_err_abort(struct sock *sk, int err) { WARN_ON_ONCE(err >= 0); /* sk->sk_err should contain a positive error code. */ WRITE_ONCE(sk->sk_err, -err); /* Paired with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); } static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { int start = skb_headlen(skb); int i, chunk = start - offset; struct sk_buff *frag_iter; int elt = 0; if (unlikely(recursion_level >= 24)) return -EMSGSIZE; if (chunk > 0) { if (chunk > len) chunk = len; elt++; len -= chunk; if (len == 0) return elt; offset += chunk; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); chunk = end - offset; if (chunk > 0) { if (chunk > len) chunk = len; elt++; len -= chunk; if (len == 0) return elt; offset += chunk; } start = end; } if (unlikely(skb_has_frag_list(skb))) { skb_walk_frags(skb, frag_iter) { int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; chunk = end - offset; if (chunk > 0) { if (chunk > len) chunk = len; ret = __skb_nsg(frag_iter, offset - start, chunk, recursion_level + 1); if (unlikely(ret < 0)) return ret; elt += ret; len -= chunk; if (len == 0) return elt; offset += chunk; } start = end; } } BUG_ON(len); return elt; } /* Return the number of scatterlist elements required to completely map the * skb, or -EMSGSIZE if the recursion depth is exceeded. */ static int skb_nsg(struct sk_buff *skb, int offset, int len) { return __skb_nsg(skb, offset, len, 0); } static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb, struct tls_decrypt_arg *darg) { struct strp_msg *rxm = strp_msg(skb); struct tls_msg *tlm = tls_msg(skb); int sub = 0; /* Determine zero-padding length */ if (prot->version == TLS_1_3_VERSION) { int offset = rxm->full_len - TLS_TAG_SIZE - 1; char content_type = darg->zc ? darg->tail : 0; int err; while (content_type == 0) { if (offset < prot->prepend_size) return -EBADMSG; err = skb_copy_bits(skb, rxm->offset + offset, &content_type, 1); if (err) return err; if (content_type) break; sub++; offset--; } tlm->control = content_type; } return sub; } static void tls_decrypt_done(void *data, int err) { struct aead_request *aead_req = data; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; struct scatterlist *sg; unsigned int pages; struct sock *sk; int aead_size; /* If requests get too backlogged crypto API returns -EBUSY and calls * ->complete(-EINPROGRESS) immediately followed by ->complete(0) * to make waiting for backlog to flush with crypto_wait_req() easier. * First wait converts -EBUSY -> -EINPROGRESS, and the second one * -EINPROGRESS -> 0. * We have a single struct crypto_async_request per direction, this * scheme doesn't help us, so just ignore the first ->complete(). */ if (err == -EINPROGRESS) return; aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); sk = dctx->sk; tls_ctx = tls_get_ctx(sk); ctx = tls_sw_ctx_rx(tls_ctx); /* Propagate if there was an err */ if (err) { if (err == -EBADMSG) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); ctx->async_wait.err = err; tls_err_abort(sk, err); } /* Free the destination pages if skb was not decrypted inplace */ if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) break; put_page(sg_page(sg)); } } kfree(aead_req); if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); } static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) { if (!atomic_dec_and_test(&ctx->decrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->decrypt_pending); return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, char *iv_recv, size_t data_len, struct aead_request *aead_req, struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); int ret; aead_request_set_tfm(aead_req, ctx->aead_recv); aead_request_set_ad(aead_req, prot->aad_size); aead_request_set_crypt(aead_req, sgin, sgout, data_len + prot->tag_size, (u8 *)iv_recv); if (darg->async) { aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { DECLARE_CRYPTO_WAIT(wait); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); ret = crypto_aead_decrypt(aead_req); if (ret == -EINPROGRESS || ret == -EBUSY) ret = crypto_wait_req(ret, &wait); return ret; } ret = crypto_aead_decrypt(aead_req); if (ret == -EINPROGRESS) return 0; if (ret == -EBUSY) { ret = tls_decrypt_async_wait(ctx); darg->async_done = true; /* all completions have run, we're not doing async anymore */ darg->async = false; return ret; } atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; } static void tls_trim_both_msgs(struct sock *sk, int target_size) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; sk_msg_trim(sk, &rec->msg_plaintext, target_size); if (target_size > 0) target_size += prot->overhead_size; sk_msg_trim(sk, &rec->msg_encrypted, target_size); } static int tls_alloc_encrypted_msg(struct sock *sk, int len) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_en = &rec->msg_encrypted; return sk_msg_alloc(sk, msg_en, len, 0); } static int tls_clone_plaintext_msg(struct sock *sk, int required) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_pl = &rec->msg_plaintext; struct sk_msg *msg_en = &rec->msg_encrypted; int skip, len; /* We add page references worth len bytes from encrypted sg * at the end of plaintext sg. It is guaranteed that msg_en * has enough required room (ensured by caller). */ len = required - msg_pl->sg.size; /* Skip initial bytes in msg_en's data to be able to use * same offset of both plain and encrypted data. */ skip = prot->prepend_size + msg_pl->sg.size; return sk_msg_clone(sk, msg_pl, msg_en, skip, len); } static struct tls_rec *tls_get_rec(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; int mem_size; mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send); rec = kzalloc(mem_size, sk->sk_allocation); if (!rec) return NULL; msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; sk_msg_init(msg_pl); sk_msg_init(msg_en); sg_init_table(rec->sg_aead_in, 2); sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_in[1]); sg_init_table(rec->sg_aead_out, 2); sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_out[1]); rec->sk = sk; return rec; } static void tls_free_rec(struct sock *sk, struct tls_rec *rec) { sk_msg_free(sk, &rec->msg_encrypted); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } static void tls_free_open_rec(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; if (rec) { tls_free_rec(sk, rec); ctx->open_rec = NULL; } } int tls_tx_records(struct sock *sk, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; struct sk_msg *msg_en; int tx_flags, rc = 0; if (tls_is_partially_sent_record(tls_ctx)) { rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); if (flags == -1) tx_flags = rec->tx_flags; else tx_flags = flags; rc = tls_push_partial_record(sk, tls_ctx, tx_flags); if (rc) goto tx_err; /* Full record has been transmitted. * Remove the head of tx_list */ list_del(&rec->list); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } /* Tx all ready records */ list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) { if (READ_ONCE(rec->tx_ready)) { if (flags == -1) tx_flags = rec->tx_flags; else tx_flags = flags; msg_en = &rec->msg_encrypted; rc = tls_push_sg(sk, tls_ctx, &msg_en->sg.data[msg_en->sg.curr], 0, tx_flags); if (rc) goto tx_err; list_del(&rec->list); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } else { break; } } tx_err: if (rc < 0 && rc != -EAGAIN) tls_err_abort(sk, -EBADMSG); return rc; } static void tls_encrypt_done(void *data, int err) { struct tls_sw_context_tx *ctx; struct tls_context *tls_ctx; struct tls_prot_info *prot; struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; struct sock *sk; if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ return; msg_en = &rec->msg_encrypted; sk = rec->sk; tls_ctx = tls_get_ctx(sk); prot = &tls_ctx->prot_info; ctx = tls_sw_ctx_tx(tls_ctx); sge = sk_msg_elem(msg_en, msg_en->sg.curr); sge->offset -= prot->prepend_size; sge->length += prot->prepend_size; /* Check if error is previously set on socket */ if (err || sk->sk_err) { rec = NULL; /* If err is already set on socket, return the same code */ if (sk->sk_err) { ctx->async_wait.err = -sk->sk_err; } else { ctx->async_wait.err = err; tls_err_abort(sk, err); } } if (rec) { struct tls_rec *first_rec; /* Mark the record as ready for transmission */ smp_store_mb(rec->tx_ready, true); /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); if (rec == first_rec) { /* Schedule the transmission */ if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) schedule_delayed_work(&ctx->tx_work.work, 1); } } if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); } static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) { if (!atomic_dec_and_test(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->encrypt_pending); return ctx->async_wait.err; } static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, struct aead_request *aead_req, size_t data_len, u32 start) { struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_en = &rec->msg_encrypted; struct scatterlist *sge = sk_msg_elem(msg_en, start); int rc, iv_offset = 0; /* For CCM based ciphers, first byte of IV is a constant */ switch (prot->cipher_type) { case TLS_CIPHER_AES_CCM_128: rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE; iv_offset = 1; break; case TLS_CIPHER_SM4_CCM: rec->iv_data[0] = TLS_SM4_CCM_IV_B0_BYTE; iv_offset = 1; break; } memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, prot->iv_size + prot->salt_size); tls_xor_iv_with_seq(prot, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; msg_en->sg.curr = start; aead_request_set_tfm(aead_req, ctx->aead_send); aead_request_set_ad(aead_req, prot->aad_size); aead_request_set_crypt(aead_req, rec->sg_aead_in, rec->sg_aead_out, data_len, rec->iv_data); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_encrypt_done, rec); /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); if (rc == -EBUSY) { rc = tls_encrypt_async_wait(ctx); rc = rc ?: -EINPROGRESS; } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; sge->length += prot->prepend_size; } if (!rc) { WRITE_ONCE(rec->tx_ready, true); } else if (rc != -EINPROGRESS) { list_del(&rec->list); return rc; } /* Unhook the record from context if encryption is not failure */ ctx->open_rec = NULL; tls_advance_record_sn(sk, prot, &tls_ctx->tx); return rc; } static int tls_split_open_record(struct sock *sk, struct tls_rec *from, struct tls_rec **to, struct sk_msg *msg_opl, struct sk_msg *msg_oen, u32 split_point, u32 tx_overhead_size, u32 *orig_end) { u32 i, j, bytes = 0, apply = msg_opl->apply_bytes; struct scatterlist *sge, *osge, *nsge; u32 orig_size = msg_opl->sg.size; struct scatterlist tmp = { }; struct sk_msg *msg_npl; struct tls_rec *new; int ret; new = tls_get_rec(sk); if (!new) return -ENOMEM; ret = sk_msg_alloc(sk, &new->msg_encrypted, msg_opl->sg.size + tx_overhead_size, 0); if (ret < 0) { tls_free_rec(sk, new); return ret; } *orig_end = msg_opl->sg.end; i = msg_opl->sg.start; sge = sk_msg_elem(msg_opl, i); while (apply && sge->length) { if (sge->length > apply) { u32 len = sge->length - apply; get_page(sg_page(sge)); sg_set_page(&tmp, sg_page(sge), len, sge->offset + apply); sge->length = apply; bytes += apply; apply = 0; } else { apply -= sge->length; bytes += sge->length; } sk_msg_iter_var_next(i); if (i == msg_opl->sg.end) break; sge = sk_msg_elem(msg_opl, i); } msg_opl->sg.end = i; msg_opl->sg.curr = i; msg_opl->sg.copybreak = 0; msg_opl->apply_bytes = 0; msg_opl->sg.size = bytes; msg_npl = &new->msg_plaintext; msg_npl->apply_bytes = apply; msg_npl->sg.size = orig_size - bytes; j = msg_npl->sg.start; nsge = sk_msg_elem(msg_npl, j); if (tmp.length) { memcpy(nsge, &tmp, sizeof(*nsge)); sk_msg_iter_var_next(j); nsge = sk_msg_elem(msg_npl, j); } osge = sk_msg_elem(msg_opl, i); while (osge->length) { memcpy(nsge, osge, sizeof(*nsge)); sg_unmark_end(nsge); sk_msg_iter_var_next(i); sk_msg_iter_var_next(j); if (i == *orig_end) break; osge = sk_msg_elem(msg_opl, i); nsge = sk_msg_elem(msg_npl, j); } msg_npl->sg.end = j; msg_npl->sg.curr = j; msg_npl->sg.copybreak = 0; *to = new; return 0; } static void tls_merge_open_record(struct sock *sk, struct tls_rec *to, struct tls_rec *from, u32 orig_end) { struct sk_msg *msg_npl = &from->msg_plaintext; struct sk_msg *msg_opl = &to->msg_plaintext; struct scatterlist *osge, *nsge; u32 i, j; i = msg_opl->sg.end; sk_msg_iter_var_prev(i); j = msg_npl->sg.start; osge = sk_msg_elem(msg_opl, i); nsge = sk_msg_elem(msg_npl, j); if (sg_page(osge) == sg_page(nsge) && osge->offset + osge->length == nsge->offset) { osge->length += nsge->length; put_page(sg_page(nsge)); } msg_opl->sg.end = orig_end; msg_opl->sg.curr = orig_end; msg_opl->sg.copybreak = 0; msg_opl->apply_bytes = msg_opl->sg.size + msg_npl->sg.size; msg_opl->sg.size += msg_npl->sg.size; sk_msg_free(sk, &to->msg_encrypted); sk_msg_xfer_full(&to->msg_encrypted, &from->msg_encrypted); kfree(from); } static int tls_push_record(struct sock *sk, int flags, unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec, *tmp = NULL; u32 i, split_point, orig_end; struct sk_msg *msg_pl, *msg_en; struct aead_request *req; bool split; int rc; if (!rec) return 0; msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; split_point = msg_pl->apply_bytes; split = split_point && split_point < msg_pl->sg.size; if (unlikely((!split && msg_pl->sg.size + prot->overhead_size > msg_en->sg.size) || (split && split_point + prot->overhead_size > msg_en->sg.size))) { split = true; split_point = msg_en->sg.size; } if (split) { rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en, split_point, prot->overhead_size, &orig_end); if (rc < 0) return rc; /* This can happen if above tls_split_open_record allocates * a single large encryption buffer instead of two smaller * ones. In this case adjust pointers and continue without * split. */ if (!msg_pl->sg.size) { tls_merge_open_record(sk, rec, tmp, orig_end); msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; split = false; } sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); } rec->tx_flags = flags; req = &rec->aead_req; i = msg_pl->sg.end; sk_msg_iter_var_prev(i); rec->content_type = record_type; if (prot->version == TLS_1_3_VERSION) { /* Add content type to end of message. No padding added */ sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); sg_mark_end(&rec->sg_content_type); sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1, &rec->sg_content_type); } else { sg_mark_end(sk_msg_elem(msg_pl, i)); } if (msg_pl->sg.end < msg_pl->sg.start) { sg_chain(&msg_pl->sg.data[msg_pl->sg.start], MAX_SKB_FRAGS - msg_pl->sg.start + 1, msg_pl->sg.data); } i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); i = msg_en->sg.end; sk_msg_iter_var_prev(i); sg_mark_end(sk_msg_elem(msg_en, i)); i = msg_en->sg.start; sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]); tls_make_aad(rec->aad_space, msg_pl->sg.size + prot->tail_size, tls_ctx->tx.rec_seq, record_type, prot); tls_fill_prepend(tls_ctx, page_address(sg_page(&msg_en->sg.data[i])) + msg_en->sg.data[i].offset, msg_pl->sg.size + prot->tail_size, record_type); tls_ctx->pending_open_record_frags = false; rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { tls_err_abort(sk, -EBADMSG); if (split) { tls_ctx->pending_open_record_frags = true; tls_merge_open_record(sk, rec, tmp, orig_end); } } ctx->async_capable = 1; return rc; } else if (split) { msg_pl = &tmp->msg_plaintext; msg_en = &tmp->msg_encrypted; sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); tls_ctx->pending_open_record_frags = true; ctx->open_rec = tmp; } return tls_tx_records(sk, flags); } static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, bool full_record, u8 record_type, ssize_t *copied, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct sk_msg msg_redir = { }; struct sk_psock *psock; struct sock *sk_redir; struct tls_rec *rec; bool enospc, policy, redir_ingress; int err = 0, send; u32 delta = 0; policy = !(flags & MSG_SENDPAGE_NOPOLICY); psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; } if (psock) sk_psock_put(sk, psock); return err; } more_data: enospc = sk_msg_full(msg); if (psock->eval == __SK_NONE) { delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); delta -= msg->sg.size; } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc && !full_record) { err = -ENOSPC; goto out_err; } msg->cork_bytes = 0; send = msg->sg.size; if (msg->apply_bytes && msg->apply_bytes < send) send = msg->apply_bytes; switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; goto out_err; } break; case __SK_REDIRECT: redir_ingress = psock->redir_ingress; sk_redir = psock->sk_redir; memcpy(&msg_redir, msg, sizeof(*msg)); if (msg->apply_bytes < send) msg->apply_bytes = 0; else msg->apply_bytes -= send; sk_msg_return_zero(sk, msg, send); msg->sg.size -= send; release_sock(sk); err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, &msg_redir, send, flags); lock_sock(sk); if (err < 0) { *copied -= sk_msg_free_nocharge(sk, &msg_redir); msg->sg.size = 0; } if (msg->sg.size == 0) tls_free_open_rec(sk); break; case __SK_DROP: default: sk_msg_free_partial(sk, msg, send); if (msg->apply_bytes < send) msg->apply_bytes = 0; else msg->apply_bytes -= send; if (msg->sg.size == 0) tls_free_open_rec(sk); *copied -= (send + delta); err = -EACCES; } if (likely(!err)) { bool reset_eval = !ctx->open_rec; rec = ctx->open_rec; if (rec) { msg = &rec->msg_plaintext; if (!msg->apply_bytes) reset_eval = true; } if (reset_eval) { psock->eval = __SK_NONE; if (psock->sk_redir) { sock_put(psock->sk_redir); psock->sk_redir = NULL; } } if (rec) goto more_data; } out_err: sk_psock_put(sk, psock); return err; } static int tls_sw_push_pending_record(struct sock *sk, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_pl; size_t copied; if (!rec) return 0; msg_pl = &rec->msg_plaintext; copied = msg_pl->sg.size; if (!copied) return 0; return bpf_exec_tx_verdict(msg_pl, sk, true, TLS_RECORD_TYPE_DATA, &copied, flags); } static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, struct sk_msg *msg_pl, size_t try_to_copy, ssize_t *copied) { struct page *page = NULL, **pages = &page; do { ssize_t part; size_t off; part = iov_iter_extract_pages(&msg->msg_iter, &pages, try_to_copy, 1, 0, &off); if (part <= 0) return part ?: -EIO; if (WARN_ON_ONCE(!sendpage_ok(page))) { iov_iter_revert(&msg->msg_iter, part); return -EIO; } sk_msg_page_add(msg_pl, page, part, off); msg_pl->sg.copybreak = 0; msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, part); *copied += part; try_to_copy -= part; } while (try_to_copy && !sk_msg_full(msg_pl)); return 0; } static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) { long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); bool async_capable = ctx->async_capable; unsigned char record_type = TLS_RECORD_TYPE_DATA; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); size_t try_to_copy; ssize_t copied = 0; struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; int required_size; int num_async = 0; bool full_record; int record_room; int num_zc = 0; int orig_size; int ret = 0; if (!eor && (msg->msg_flags & MSG_EOR)) return -EINVAL; if (unlikely(msg->msg_controllen)) { ret = tls_process_cmsg(sk, msg, &record_type); if (ret) { if (ret == -EINPROGRESS) num_async++; else if (ret != -EAGAIN) goto send_end; } } while (msg_data_left(msg)) { if (sk->sk_err) { ret = -sk->sk_err; goto send_end; } if (ctx->open_rec) rec = ctx->open_rec; else rec = ctx->open_rec = tls_get_rec(sk); if (!rec) { ret = -ENOMEM; goto send_end; } msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; orig_size = msg_pl->sg.size; full_record = false; try_to_copy = msg_data_left(msg); record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size; if (try_to_copy >= record_room) { try_to_copy = record_room; full_record = true; } required_size = msg_pl->sg.size + try_to_copy + prot->overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; alloc_encrypted: ret = tls_alloc_encrypted_msg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto wait_for_memory; /* Adjust try_to_copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ try_to_copy -= required_size - msg_en->sg.size; full_record = true; } if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) { ret = tls_sw_sendmsg_splice(sk, msg, msg_pl, try_to_copy, &copied); if (ret < 0) goto send_end; tls_ctx->pending_open_record_frags = true; if (sk_msg_full(msg_pl)) full_record = true; if (full_record || eor) goto copied; continue; } if (!is_kvec && (full_record || eor) && !async_capable) { u32 first = msg_pl->sg.end; ret = sk_msg_zerocopy_from_iter(sk, &msg->msg_iter, msg_pl, try_to_copy); if (ret) goto fallback_to_reg_send; num_zc++; copied += try_to_copy; sk_msg_sg_copy_set(msg_pl, first); ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, record_type, &copied, msg->msg_flags); if (ret) { if (ret == -EINPROGRESS) num_async++; else if (ret == -ENOMEM) goto wait_for_memory; else if (ctx->open_rec && ret == -ENOSPC) goto rollback_iter; else if (ret != -EAGAIN) goto send_end; } continue; rollback_iter: copied -= try_to_copy; sk_msg_sg_copy_clear(msg_pl, first); iov_iter_revert(&msg->msg_iter, msg_pl->sg.size - orig_size); fallback_to_reg_send: sk_msg_trim(sk, msg_pl, orig_size); } required_size = msg_pl->sg.size + try_to_copy; ret = tls_clone_plaintext_msg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto send_end; /* Adjust try_to_copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ try_to_copy -= required_size - msg_pl->sg.size; full_record = true; sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); } if (try_to_copy) { ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_pl, try_to_copy); if (ret < 0) goto trim_sgl; } /* Open records defined only if successfully copied, otherwise * we would trim the sg but not reset the open record frags. */ tls_ctx->pending_open_record_frags = true; copied += try_to_copy; copied: if (full_record || eor) { ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, record_type, &copied, msg->msg_flags); if (ret) { if (ret == -EINPROGRESS) num_async++; else if (ret == -ENOMEM) goto wait_for_memory; else if (ret != -EAGAIN) { if (ret == -ENOSPC) ret = 0; goto send_end; } } } continue; wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: ret = sk_stream_wait_memory(sk, &timeo); if (ret) { trim_sgl: if (ctx->open_rec) tls_trim_both_msgs(sk, orig_size); goto send_end; } if (ctx->open_rec && msg_en->sg.size < required_size) goto alloc_encrypted; } if (!num_async) { goto send_end; } else if (num_zc || eor) { int err; /* Wait for pending encryptions to get completed */ err = tls_encrypt_async_wait(ctx); if (err) { ret = err; copied = 0; } } /* Transmit if any encryptions have completed */ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { cancel_delayed_work(&ctx->tx_work.work); tls_tx_records(sk, msg->msg_flags); } send_end: ret = sk_stream_error(sk, msg->msg_flags, ret); return copied > 0 ? copied : ret; } int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tls_context *tls_ctx = tls_get_ctx(sk); int ret; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT | MSG_SPLICE_PAGES | MSG_EOR | MSG_SENDPAGE_NOPOLICY)) return -EOPNOTSUPP; ret = mutex_lock_interruptible(&tls_ctx->tx_lock); if (ret) return ret; lock_sock(sk); ret = tls_sw_sendmsg_locked(sk, msg, size); release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); return ret; } /* * Handle unexpected EOF during splice without SPLICE_F_MORE set. */ void tls_sw_splice_eof(struct socket *sock) { struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec; struct sk_msg *msg_pl; ssize_t copied = 0; bool retrying = false; int ret = 0; if (!ctx->open_rec) return; mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); retry: /* same checks as in tls_sw_push_pending_record() */ rec = ctx->open_rec; if (!rec) goto unlock; msg_pl = &rec->msg_plaintext; if (msg_pl->sg.size == 0) goto unlock; /* Check the BPF advisor and perform transmission. */ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, &copied, 0); switch (ret) { case 0: case -EAGAIN: if (retrying) goto unlock; retrying = true; goto retry; case -EINPROGRESS: break; default: goto unlock; } /* Wait for pending encryptions to get completed */ if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { cancel_delayed_work(&ctx->tx_work.work); tls_tx_records(sk, 0); } unlock: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); } static int tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, bool released) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); int ret = 0; long timeo; timeo = sock_rcvtimeo(sk, nonblock); while (!tls_strp_msg_ready(ctx)) { if (!sk_psock_queue_empty(psock)) return 0; if (sk->sk_err) return sock_error(sk); if (ret < 0) return ret; if (!skb_queue_empty(&sk->sk_receive_queue)) { tls_strp_check_rcv(&ctx->strp); if (tls_strp_msg_ready(ctx)) break; } if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; if (sock_flag(sk, SOCK_DONE)) return 0; if (!timeo) return -EAGAIN; released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); ret = sk_wait_event(sk, &timeo, tls_strp_msg_ready(ctx) || !sk_psock_queue_empty(psock), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); /* Handle signals */ if (signal_pending(current)) return sock_intr_errno(timeo); } tls_strp_msg_load(&ctx->strp, released); return 1; } static int tls_setup_from_iter(struct iov_iter *from, int length, int *pages_used, struct scatterlist *to, int to_max_pages) { int rc = 0, i = 0, num_elem = *pages_used, maxpages; struct page *pages[MAX_SKB_FRAGS]; unsigned int size = 0; ssize_t copied, use; size_t offset; while (length > 0) { i = 0; maxpages = to_max_pages - num_elem; if (maxpages == 0) { rc = -EFAULT; goto out; } copied = iov_iter_get_pages2(from, pages, length, maxpages, &offset); if (copied <= 0) { rc = -EFAULT; goto out; } length -= copied; size += copied; while (copied) { use = min_t(int, copied, PAGE_SIZE - offset); sg_set_page(&to[num_elem], pages[i], use, offset); sg_unmark_end(&to[num_elem]); /* We do not uncharge memory from this API */ offset = 0; copied -= use; i++; num_elem++; } } /* Mark the end in the last sg entry if newly added */ if (num_elem > *pages_used) sg_mark_end(&to[num_elem - 1]); out: if (rc) iov_iter_revert(from, size); *pages_used = num_elem; return rc; } static struct sk_buff * tls_alloc_clrtxt_skb(struct sock *sk, struct sk_buff *skb, unsigned int full_len) { struct strp_msg *clr_rxm; struct sk_buff *clr_skb; int err; clr_skb = alloc_skb_with_frags(0, full_len, TLS_PAGE_ORDER, &err, sk->sk_allocation); if (!clr_skb) return NULL; skb_copy_header(clr_skb, skb); clr_skb->len = full_len; clr_skb->data_len = full_len; clr_rxm = strp_msg(clr_skb); clr_rxm->offset = 0; return clr_skb; } /* Decrypt handlers * * tls_decrypt_sw() and tls_decrypt_device() are decrypt handlers. * They must transform the darg in/out argument are as follows: * | Input | Output * ------------------------------------------------------------------- * zc | Zero-copy decrypt allowed | Zero-copy performed * async | Async decrypt allowed | Async crypto used / in progress * skb | * | Output skb * * If ZC decryption was performed darg.skb will point to the input skb. */ /* This function decrypts the input skb into either out_iov or in out_sg * or in skb buffers itself. The input parameter 'darg->zc' indicates if * zero-copy mode needs to be tried or not. With zero-copy mode, either * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are * NULL, then the decryption happens inside skb buffers itself, i.e. * zero-copy gets disabled and 'darg->zc' is updated. */ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, struct scatterlist *out_sg, struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; int n_sgin, n_sgout, aead_size, err, pages = 0; struct sk_buff *skb = tls_strp_msg(ctx); const struct strp_msg *rxm = strp_msg(skb); const struct tls_msg *tlm = tls_msg(skb); struct aead_request *aead_req; struct scatterlist *sgin = NULL; struct scatterlist *sgout = NULL; const int data_len = rxm->full_len - prot->overhead_size; int tail_pages = !!prot->tail_size; struct tls_decrypt_ctx *dctx; struct sk_buff *clear_skb; int iv_offset = 0; u8 *mem; n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size, rxm->full_len - prot->prepend_size); if (n_sgin < 1) return n_sgin ?: -EBADMSG; if (darg->zc && (out_iov || out_sg)) { clear_skb = NULL; if (out_iov) n_sgout = 1 + tail_pages + iov_iter_npages_cap(out_iov, INT_MAX, data_len); else n_sgout = sg_nents(out_sg); } else { darg->zc = false; clear_skb = tls_alloc_clrtxt_skb(sk, skb, rxm->full_len); if (!clear_skb) return -ENOMEM; n_sgout = 1 + skb_shinfo(clear_skb)->nr_frags; } /* Increment to accommodate AAD */ n_sgin = n_sgin + 1; /* Allocate a single block of memory which contains * aead_req || tls_decrypt_ctx. * Both structs are variable length. */ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); aead_size = ALIGN(aead_size, __alignof__(*dctx)); mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)), sk->sk_allocation); if (!mem) { err = -ENOMEM; goto exit_free_skb; } /* Segment the allocated memory */ aead_req = (struct aead_request *)mem; dctx = (struct tls_decrypt_ctx *)(mem + aead_size); dctx->sk = sk; sgin = &dctx->sg[0]; sgout = &dctx->sg[n_sgin]; /* For CCM based ciphers, first byte of nonce+iv is a constant */ switch (prot->cipher_type) { case TLS_CIPHER_AES_CCM_128: dctx->iv[0] = TLS_AES_CCM_IV_B0_BYTE; iv_offset = 1; break; case TLS_CIPHER_SM4_CCM: dctx->iv[0] = TLS_SM4_CCM_IV_B0_BYTE; iv_offset = 1; break; } /* Prepare IV */ if (prot->version == TLS_1_3_VERSION || prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) { memcpy(&dctx->iv[iv_offset], tls_ctx->rx.iv, prot->iv_size + prot->salt_size); } else { err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, &dctx->iv[iv_offset] + prot->salt_size, prot->iv_size); if (err < 0) goto exit_free; memcpy(&dctx->iv[iv_offset], tls_ctx->rx.iv, prot->salt_size); } tls_xor_iv_with_seq(prot, &dctx->iv[iv_offset], tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(dctx->aad, rxm->full_len - prot->overhead_size + prot->tail_size, tls_ctx->rx.rec_seq, tlm->control, prot); /* Prepare sgin */ sg_init_table(sgin, n_sgin); sg_set_buf(&sgin[0], dctx->aad, prot->aad_size); err = skb_to_sgvec(skb, &sgin[1], rxm->offset + prot->prepend_size, rxm->full_len - prot->prepend_size); if (err < 0) goto exit_free; if (clear_skb) { sg_init_table(sgout, n_sgout); sg_set_buf(&sgout[0], dctx->aad, prot->aad_size); err = skb_to_sgvec(clear_skb, &sgout[1], prot->prepend_size, data_len + prot->tail_size); if (err < 0) goto exit_free; } else if (out_iov) { sg_init_table(sgout, n_sgout); sg_set_buf(&sgout[0], dctx->aad, prot->aad_size); err = tls_setup_from_iter(out_iov, data_len, &pages, &sgout[1], (n_sgout - 1 - tail_pages)); if (err < 0) goto exit_free_pages; if (prot->tail_size) { sg_unmark_end(&sgout[pages]); sg_set_buf(&sgout[pages + 1], &dctx->tail, prot->tail_size); sg_mark_end(&sgout[pages + 1]); } } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); if (err) { if (darg->async_done) goto exit_free_skb; goto exit_free_pages; } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; if (unlikely(darg->async)) { err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold); if (err) __skb_queue_tail(&ctx->async_hold, darg->skb); return err; } if (unlikely(darg->async_done)) return 0; if (prot->tail_size) darg->tail = dctx->tail; exit_free_pages: /* Release the pages in case iov was mapped to pages */ for (; pages > 0; pages--) put_page(sg_page(&sgout[pages])); exit_free: kfree(mem); exit_free_skb: consume_skb(clear_skb); return err; } static int tls_decrypt_sw(struct sock *sk, struct tls_context *tls_ctx, struct msghdr *msg, struct tls_decrypt_arg *darg) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm; int pad, err; err = tls_decrypt_sg(sk, &msg->msg_iter, NULL, darg); if (err < 0) { if (err == -EBADMSG) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); return err; } /* keep going even for ->async, the code below is TLS 1.3 */ /* If opportunistic TLS 1.3 ZC failed retry without ZC */ if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION && darg->tail != TLS_RECORD_TYPE_DATA)) { darg->zc = false; if (!darg->tail) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXNOPADVIOL); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTRETRY); return tls_decrypt_sw(sk, tls_ctx, msg, darg); } pad = tls_padding_length(prot, darg->skb, darg); if (pad < 0) { if (darg->skb != tls_strp_msg(ctx)) consume_skb(darg->skb); return pad; } rxm = strp_msg(darg->skb); rxm->full_len -= pad; return 0; } static int tls_decrypt_device(struct sock *sk, struct msghdr *msg, struct tls_context *tls_ctx, struct tls_decrypt_arg *darg) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm; int pad, err; if (tls_ctx->rx_conf != TLS_HW) return 0; err = tls_device_decrypted(sk, tls_ctx); if (err <= 0) return err; pad = tls_padding_length(prot, tls_strp_msg(ctx), darg); if (pad < 0) return pad; darg->async = false; darg->skb = tls_strp_msg(ctx); /* ->zc downgrade check, in case TLS 1.3 gets here */ darg->zc &= !(prot->version == TLS_1_3_VERSION && tls_msg(darg->skb)->control != TLS_RECORD_TYPE_DATA); rxm = strp_msg(darg->skb); rxm->full_len -= pad; if (!darg->zc) { /* Non-ZC case needs a real skb */ darg->skb = tls_strp_msg_detach(ctx); if (!darg->skb) return -ENOMEM; } else { unsigned int off, len; /* In ZC case nobody cares about the output skb. * Just copy the data here. Note the skb is not fully trimmed. */ off = rxm->offset + prot->prepend_size; len = rxm->full_len - prot->overhead_size; err = skb_copy_datagram_msg(darg->skb, off, msg, len); if (err) return err; } return 1; } static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm; int err; err = tls_decrypt_device(sk, msg, tls_ctx, darg); if (!err) err = tls_decrypt_sw(sk, tls_ctx, msg, darg); if (err < 0) return err; rxm = strp_msg(darg->skb); rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); return 0; } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) { struct tls_decrypt_arg darg = { .zc = true, }; return tls_decrypt_sg(sk, NULL, sgout, &darg); } static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, u8 *control) { int err; if (!*control) { *control = tlm->control; if (!*control) return -EBADMSG; err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, sizeof(*control), control); if (*control != TLS_RECORD_TYPE_DATA) { if (err || msg->msg_flags & MSG_CTRUNC) return -EIO; } } else if (*control != tlm->control) { return 0; } return 1; } static void tls_rx_rec_done(struct tls_sw_context_rx *ctx) { tls_strp_msg_done(&ctx->strp); } /* This function traverses the rx_list in tls receive context to copies the * decrypted records into the buffer provided by caller zero copy is not * true. Further, the records are removed from the rx_list if it is not a peek * case and the record has been consumed completely. */ static int process_rx_list(struct tls_sw_context_rx *ctx, struct msghdr *msg, u8 *control, size_t skip, size_t len, bool is_peek, bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; ssize_t copied = 0; int err; while (skip && skb) { struct strp_msg *rxm = strp_msg(skb); tlm = tls_msg(skb); err = tls_record_content_type(msg, tlm, control); if (err <= 0) goto more; if (skip < rxm->full_len) break; skip = skip - rxm->full_len; skb = skb_peek_next(skb, &ctx->rx_list); } while (len && skb) { struct sk_buff *next_skb; struct strp_msg *rxm = strp_msg(skb); int chunk = min_t(unsigned int, rxm->full_len - skip, len); tlm = tls_msg(skb); err = tls_record_content_type(msg, tlm, control); if (err <= 0) goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) goto more; len = len - chunk; copied = copied + chunk; /* Consume the data from record if it is non-peek case*/ if (!is_peek) { rxm->offset = rxm->offset + chunk; rxm->full_len = rxm->full_len - chunk; /* Return if there is unconsumed data in the record */ if (rxm->full_len - skip) break; } /* The remaining skip-bytes must lie in 1st record in rx_list. * So from the 2nd record, 'skip' should be 0. */ skip = 0; if (msg) msg->msg_flags |= MSG_EOR; next_skb = skb_peek_next(skb, &ctx->rx_list); if (!is_peek) { __skb_unlink(skb, &ctx->rx_list); consume_skb(skb); } skb = next_skb; } err = 0; out: return copied ? : err; more: if (more) *more = true; goto out; } static bool tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, size_t len_left, size_t decrypted, ssize_t done, size_t *flushed_at) { size_t max_rec; if (len_left <= decrypted) return false; max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE; if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec) return false; *flushed_at = done; return sk_flush_backlog(sk); } static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { long timeo; int ret; timeo = sock_rcvtimeo(sk, nonblock); while (unlikely(ctx->reader_present)) { DEFINE_WAIT_FUNC(wait, woken_wake_function); ctx->reader_contended = 1; add_wait_queue(&ctx->wq, &wait); ret = sk_wait_event(sk, &timeo, !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); if (timeo <= 0) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(timeo); if (ret < 0) return ret; } WRITE_ONCE(ctx->reader_present, 1); return 0; } static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { int err; lock_sock(sk); err = tls_rx_reader_acquire(sk, ctx, nonblock); if (err) release_sock(sk); return err; } static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx) { if (unlikely(ctx->reader_contended)) { if (wq_has_sleeper(&ctx->wq)) wake_up(&ctx->wq); else ctx->reader_contended = 0; WARN_ON_ONCE(!ctx->reader_present); } WRITE_ONCE(ctx->reader_present, 0); } static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) { tls_rx_reader_release(sk, ctx); release_sock(sk); } int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; ssize_t decrypted = 0, async_copy_bytes = 0; struct sk_psock *psock; unsigned char control = 0; size_t flushed_at = 0; struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ err = ctx->async_wait.err; if (err) goto end; /* Process pending decrypted records. It must be non-zero-copy */ err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); len = len - copied; zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && ctx->zc_capable; decrypted = 0; while (len && (decrypted + copied < target || tls_strp_msg_ready(ctx))) { struct tls_decrypt_arg darg; int to_decrypt, chunk; err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, released); if (err <= 0) { if (psock) { chunk = sk_msg_recvmsg(sk, psock, msg, len, flags); if (chunk > 0) { decrypted += chunk; len -= chunk; continue; } } goto recv_end; } memset(&darg.inargs, 0, sizeof(darg.inargs)); rxm = strp_msg(tls_strp_msg(ctx)); tlm = tls_msg(tls_strp_msg(ctx)); to_decrypt = rxm->full_len - prot->overhead_size; if (zc_capable && to_decrypt <= len && tlm->control == TLS_RECORD_TYPE_DATA) darg.zc = true; /* Do not use async mode if record is non-data */ if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) darg.async = ctx->async_capable; else darg.async = false; err = tls_rx_one_record(sk, msg, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto recv_end; } async |= darg.async; /* If the type of records being processed is not known yet, * set it to record type just dequeued. If it is already known, * but does not match the record type just dequeued, go to end. * We always get record type here since for tls1.2, record type * is known just after record is dequeued from stream parser. * For tls1.3, we disable async. */ err = tls_record_content_type(msg, tls_msg(darg.skb), &control); if (err <= 0) { DEBUG_NET_WARN_ON_ONCE(darg.zc); tls_rx_rec_done(ctx); put_on_rx_list_err: __skb_queue_tail(&ctx->rx_list, darg.skb); goto recv_end; } /* periodically flush backlog, and feed strparser */ released = tls_read_flush_backlog(sk, prot, len, to_decrypt, decrypted + copied, &flushed_at); /* TLS 1.3 may have updated the length by more than overhead */ rxm = strp_msg(darg.skb); chunk = rxm->full_len; tls_rx_rec_done(ctx); if (!darg.zc) { bool partially_consumed = chunk > len; struct sk_buff *skb = darg.skb; DEBUG_NET_WARN_ON_ONCE(darg.skb == ctx->strp.anchor); if (async) { /* TLS 1.2-only, to_decrypt must be text len */ chunk = min_t(int, to_decrypt, len); async_copy_bytes += chunk; put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); if (unlikely(control != TLS_RECORD_TYPE_DATA)) break; continue; } if (bpf_strp_enabled) { released = true; err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; rxm->full_len = 0; if (err == __SK_DROP) consume_skb(skb); continue; } } if (partially_consumed) chunk = len; err = skb_copy_datagram_msg(skb, rxm->offset, msg, chunk); if (err < 0) goto put_on_rx_list_err; if (is_peek) { peeked += chunk; goto put_on_rx_list; } if (partially_consumed) { rxm->offset += chunk; rxm->full_len -= chunk; goto put_on_rx_list; } consume_skb(skb); } decrypted += chunk; len -= chunk; /* Return full control message to userspace before trying * to parse another message type */ msg->msg_flags |= MSG_EOR; if (control != TLS_RECORD_TYPE_DATA) break; } recv_end: if (async) { int ret; /* Wait for all previously submitted records to be decrypted */ ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) err = ret; goto end; } /* Drain records from the rx_list & copy if required */ if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); /* we could have copied less than we wanted, and possibly nothing */ decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; end: tls_rx_reader_unlock(sk, ctx); if (psock) sk_psock_put(sk, psock); return copied ? : err; } ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct tls_context *tls_ctx = tls_get_ctx(sock->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm = NULL; struct sock *sk = sock->sk; struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; int chunk; int err; err = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK); if (err < 0) return err; if (!skb_queue_empty(&ctx->rx_list)) { skb = __skb_dequeue(&ctx->rx_list); } else { struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK, true); if (err <= 0) goto splice_read_end; memset(&darg.inargs, 0, sizeof(darg.inargs)); err = tls_rx_one_record(sk, NULL, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto splice_read_end; } tls_rx_rec_done(ctx); skb = darg.skb; } rxm = strp_msg(skb); tlm = tls_msg(skb); /* splice does not support reading control messages */ if (tlm->control != TLS_RECORD_TYPE_DATA) { err = -EINVAL; goto splice_requeue; } chunk = min_t(unsigned int, rxm->full_len, len); copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags); if (copied < 0) goto splice_requeue; if (chunk < rxm->full_len) { rxm->offset += len; rxm->full_len -= len; goto splice_requeue; } consume_skb(skb); splice_read_end: tls_rx_reader_unlock(sk, ctx); return copied ? : err; splice_requeue: __skb_queue_head(&ctx->rx_list, skb); goto splice_read_end; } int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t read_actor) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm = NULL; struct sk_buff *skb = NULL; struct sk_psock *psock; size_t flushed_at = 0; bool released = true; struct tls_msg *tlm; ssize_t copied = 0; ssize_t decrypted; int err, used; psock = sk_psock_get(sk); if (psock) { sk_psock_put(sk, psock); return -EINVAL; } err = tls_rx_reader_acquire(sk, ctx, true); if (err < 0) return err; /* If crypto failed the connection is broken */ err = ctx->async_wait.err; if (err) goto read_sock_end; decrypted = 0; do { if (!skb_queue_empty(&ctx->rx_list)) { skb = __skb_dequeue(&ctx->rx_list); rxm = strp_msg(skb); tlm = tls_msg(skb); } else { struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, true, released); if (err <= 0) goto read_sock_end; memset(&darg.inargs, 0, sizeof(darg.inargs)); err = tls_rx_one_record(sk, NULL, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto read_sock_end; } released = tls_read_flush_backlog(sk, prot, INT_MAX, 0, decrypted, &flushed_at); skb = darg.skb; rxm = strp_msg(skb); tlm = tls_msg(skb); decrypted += rxm->full_len; tls_rx_rec_done(ctx); } /* read_sock does not support reading control messages */ if (tlm->control != TLS_RECORD_TYPE_DATA) { err = -EINVAL; goto read_sock_requeue; } used = read_actor(desc, skb, rxm->offset, rxm->full_len); if (used <= 0) { if (!copied) err = used; goto read_sock_requeue; } copied += used; if (used < rxm->full_len) { rxm->offset += used; rxm->full_len -= used; if (!desc->count) goto read_sock_requeue; } else { consume_skb(skb); if (!desc->count) skb = NULL; } } while (skb); read_sock_end: tls_rx_reader_release(sk, ctx); return copied ? : err; read_sock_requeue: __skb_queue_head(&ctx->rx_list, skb); goto read_sock_end; } bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); bool ingress_empty = true; struct sk_psock *psock; rcu_read_lock(); psock = sk_psock(sk); if (psock) ingress_empty = list_empty(&psock->ingress_msg); rcu_read_unlock(); return !ingress_empty || tls_strp_msg_ready(ctx) || !skb_queue_empty(&ctx->rx_list); } int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_prot_info *prot = &tls_ctx->prot_info; char header[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; size_t cipher_overhead; size_t data_len = 0; int ret; /* Verify that we have a full TLS header, or wait for more data */ if (strp->stm.offset + prot->prepend_size > skb->len) return 0; /* Sanity-check size of on-stack buffer. */ if (WARN_ON(prot->prepend_size > sizeof(header))) { ret = -EINVAL; goto read_failure; } /* Linearize header to local buffer */ ret = skb_copy_bits(skb, strp->stm.offset, header, prot->prepend_size); if (ret < 0) goto read_failure; strp->mark = header[0]; data_len = ((header[4] & 0xFF) | (header[3] << 8)); cipher_overhead = prot->tag_size; if (prot->version != TLS_1_3_VERSION && prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) cipher_overhead += prot->iv_size; if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead + prot->tail_size) { ret = -EMSGSIZE; goto read_failure; } if (data_len < cipher_overhead) { ret = -EBADMSG; goto read_failure; } /* Note that both TLS1.3 and TLS1.2 use TLS_1_2 version here */ if (header[1] != TLS_1_2_VERSION_MINOR || header[2] != TLS_1_2_VERSION_MAJOR) { ret = -EINVAL; goto read_failure; } tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE, TCP_SKB_CB(skb)->seq + strp->stm.offset); return data_len + TLS_HEADER_SIZE; read_failure: tls_err_abort(strp->sk, ret); return ret; } void tls_rx_msg_ready(struct tls_strparser *strp) { struct tls_sw_context_rx *ctx; ctx = container_of(strp, struct tls_sw_context_rx, strp); ctx->saved_data_ready(strp->sk); } static void tls_data_ready(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct sk_psock *psock; gfp_t alloc_save; trace_sk_data_ready(sk); alloc_save = sk->sk_allocation; sk->sk_allocation = GFP_ATOMIC; tls_strp_data_ready(&ctx->strp); sk->sk_allocation = alloc_save; psock = sk_psock_get(sk); if (psock) { if (!list_empty(&psock->ingress_msg)) ctx->saved_data_ready(sk); sk_psock_put(sk, psock); } } void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) { struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask); set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); cancel_delayed_work_sync(&ctx->tx_work.work); } void tls_sw_release_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; /* Wait for any pending async encryptions to complete */ tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); /* Free up un-sent records in tx_list. First, free * the partially sent record if any at head of tx_list. */ if (tls_ctx->partially_sent_record) { tls_free_partial_record(sk, tls_ctx); rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); list_del(&rec->list); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) { list_del(&rec->list); sk_msg_free(sk, &rec->msg_encrypted); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } crypto_free_aead(ctx->aead_send); tls_free_open_rec(sk); } void tls_sw_free_ctx_tx(struct tls_context *tls_ctx) { struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); kfree(ctx); } void tls_sw_release_resources_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); if (ctx->aead_recv) { __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); tls_strp_stop(&ctx->strp); /* If tls_sw_strparser_arm() was not called (cleanup paths) * we still want to tls_strp_stop(), but sk->sk_data_ready was * never swapped. */ if (ctx->saved_data_ready) { write_lock_bh(&sk->sk_callback_lock); sk->sk_data_ready = ctx->saved_data_ready; write_unlock_bh(&sk->sk_callback_lock); } } } void tls_sw_strparser_done(struct tls_context *tls_ctx) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); tls_strp_done(&ctx->strp); } void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); kfree(ctx); } void tls_sw_free_resources_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); tls_sw_release_resources_rx(sk); tls_sw_free_ctx_rx(tls_ctx); } /* The work handler to transmitt the encrypted records in tx_list */ static void tx_work_handler(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct tx_work *tx_work = container_of(delayed_work, struct tx_work, work); struct sock *sk = tx_work->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx; if (unlikely(!tls_ctx)) return; ctx = tls_sw_ctx_tx(tls_ctx); if (test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask)) return; if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; if (mutex_trylock(&tls_ctx->tx_lock)) { lock_sock(sk); tls_tx_records(sk, -1); release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { /* Someone is holding the tx_lock, they will likely run Tx * and cancel the work on their way out of the lock section. * Schedule a long delay just in case. */ schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); } } static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx) { struct tls_rec *rec; rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list); if (!rec) return false; return READ_ONCE(rec->tx_ready); } void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) { struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); /* Schedule the transmission if tx list is ready */ if (tls_is_tx_ready(tx_ctx) && !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask)) schedule_delayed_work(&tx_ctx->tx_work.work, 0); } void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) { struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); write_lock_bh(&sk->sk_callback_lock); rx_ctx->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); } void tls_update_rx_zc_capable(struct tls_context *tls_ctx) { struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); rx_ctx->zc_capable = tls_ctx->rx_no_pad || tls_ctx->prot_info.version != TLS_1_3_VERSION; } static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) { struct tls_sw_context_tx *sw_ctx_tx; if (!ctx->priv_ctx_tx) { sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); if (!sw_ctx_tx) return NULL; } else { sw_ctx_tx = ctx->priv_ctx_tx; } crypto_init_wait(&sw_ctx_tx->async_wait); atomic_set(&sw_ctx_tx->encrypt_pending, 1); INIT_LIST_HEAD(&sw_ctx_tx->tx_list); INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); sw_ctx_tx->tx_work.sk = sk; return sw_ctx_tx; } static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) { struct tls_sw_context_rx *sw_ctx_rx; if (!ctx->priv_ctx_rx) { sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); if (!sw_ctx_rx) return NULL; } else { sw_ctx_rx = ctx->priv_ctx_rx; } crypto_init_wait(&sw_ctx_rx->async_wait); atomic_set(&sw_ctx_rx->decrypt_pending, 1); init_waitqueue_head(&sw_ctx_rx->wq); skb_queue_head_init(&sw_ctx_rx->rx_list); skb_queue_head_init(&sw_ctx_rx->async_hold); return sw_ctx_rx; } int init_prot_info(struct tls_prot_info *prot, const struct tls_crypto_info *crypto_info, const struct tls_cipher_desc *cipher_desc) { u16 nonce_size = cipher_desc->nonce; if (crypto_info->version == TLS_1_3_VERSION) { nonce_size = 0; prot->aad_size = TLS_HEADER_SIZE; prot->tail_size = 1; } else { prot->aad_size = TLS_AAD_SPACE_SIZE; prot->tail_size = 0; } /* Sanity-check the sizes for stack allocations. */ if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) return -EINVAL; prot->version = crypto_info->version; prot->cipher_type = crypto_info->cipher_type; prot->prepend_size = TLS_HEADER_SIZE + nonce_size; prot->tag_size = cipher_desc->tag; prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; prot->iv_size = cipher_desc->iv; prot->salt_size = cipher_desc->salt; prot->rec_seq_size = cipher_desc->rec_seq; return 0; } int tls_set_sw_offload(struct sock *sk, int tx) { struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; const struct tls_cipher_desc *cipher_desc; struct tls_crypto_info *crypto_info; char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; struct tls_prot_info *prot; struct crypto_aead **aead; struct tls_context *ctx; struct crypto_tfm *tfm; int rc = 0; ctx = tls_get_ctx(sk); prot = &ctx->prot_info; if (tx) { ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); if (!ctx->priv_ctx_tx) return -ENOMEM; sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; } else { ctx->priv_ctx_rx = init_ctx_rx(ctx); if (!ctx->priv_ctx_rx) return -ENOMEM; sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; aead = &sw_ctx_rx->aead_recv; } cipher_desc = get_cipher_desc(crypto_info->cipher_type); if (!cipher_desc) { rc = -EINVAL; goto free_priv; } rc = init_prot_info(prot, crypto_info, cipher_desc); if (rc) goto free_priv; iv = crypto_info_iv(crypto_info, cipher_desc); key = crypto_info_key(crypto_info, cipher_desc); salt = crypto_info_salt(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); memcpy(cctx->iv, salt, cipher_desc->salt); memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); if (!*aead) { *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; goto free_priv; } } ctx->push_pending_record = tls_sw_push_pending_record; rc = crypto_aead_setkey(*aead, key, cipher_desc->key); if (rc) goto free_aead; rc = crypto_aead_setauthsize(*aead, prot->tag_size); if (rc) goto free_aead; if (sw_ctx_rx) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); tls_update_rx_zc_capable(ctx); sw_ctx_rx->async_capable = crypto_info->version != TLS_1_3_VERSION && !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); rc = tls_strp_init(&sw_ctx_rx->strp, sk); if (rc) goto free_aead; } goto out; free_aead: crypto_free_aead(*aead); *aead = NULL; free_priv: if (tx) { kfree(ctx->priv_ctx_tx); ctx->priv_ctx_tx = NULL; } else { kfree(ctx->priv_ctx_rx); ctx->priv_ctx_rx = NULL; } out: return rc; } |
3 1 1 2 2 2 1 2 2 2 17 17 17 55 50 19 1 1 1 1 1 1 1 1 3 3 1 1 1 2 1 1 1 2 1 1 2 4 1 1 1 1 3 3 3 1 2 2 1 1 2 4 4 1 2 1 1 2 4 4 1 2 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 3 3 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 2 1 1 1 5 5 3 3 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 2 1 1 1 1 1 3 3 1 1 1 3 3 2 2 1 1 1 1 1 5 5 1 5 4 4 4 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890 9891 9892 9893 9894 9895 9896 9897 9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943 9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962 9963 9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190 10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205 10206 10207 10208 10209 10210 10211 10212 10213 10214 10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262 10263 10264 10265 10266 10267 10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310 10311 10312 10313 10314 10315 10316 10317 10318 10319 10320 10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334 10335 10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 10414 10415 10416 10417 10418 10419 10420 10421 10422 10423 10424 10425 10426 10427 10428 10429 10430 10431 10432 10433 10434 10435 10436 10437 10438 10439 10440 10441 10442 10443 10444 10445 10446 10447 10448 10449 10450 10451 10452 10453 10454 10455 10456 10457 10458 10459 10460 10461 10462 10463 10464 10465 10466 10467 10468 10469 10470 10471 10472 10473 10474 10475 10476 10477 10478 10479 10480 10481 10482 10483 10484 10485 10486 10487 10488 10489 10490 10491 10492 10493 10494 10495 10496 10497 10498 10499 10500 10501 10502 10503 10504 10505 10506 10507 10508 10509 10510 | /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2010 Nokia Corporation Copyright (C) 2011-2012 Intel Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth HCI Management interface */ #include <linux/module.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/hci_sock.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> #include "smp.h" #include "mgmt_util.h" #include "mgmt_config.h" #include "msft.h" #include "eir.h" #include "aosp.h" #define MGMT_VERSION 1 #define MGMT_REVISION 23 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, MGMT_OP_READ_INFO, MGMT_OP_SET_POWERED, MGMT_OP_SET_DISCOVERABLE, MGMT_OP_SET_CONNECTABLE, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_OP_SET_BONDABLE, MGMT_OP_SET_LINK_SECURITY, MGMT_OP_SET_SSP, MGMT_OP_SET_HS, MGMT_OP_SET_LE, MGMT_OP_SET_DEV_CLASS, MGMT_OP_SET_LOCAL_NAME, MGMT_OP_ADD_UUID, MGMT_OP_REMOVE_UUID, MGMT_OP_LOAD_LINK_KEYS, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_OP_DISCONNECT, MGMT_OP_GET_CONNECTIONS, MGMT_OP_PIN_CODE_REPLY, MGMT_OP_PIN_CODE_NEG_REPLY, MGMT_OP_SET_IO_CAPABILITY, MGMT_OP_PAIR_DEVICE, MGMT_OP_CANCEL_PAIR_DEVICE, MGMT_OP_UNPAIR_DEVICE, MGMT_OP_USER_CONFIRM_REPLY, MGMT_OP_USER_CONFIRM_NEG_REPLY, MGMT_OP_USER_PASSKEY_REPLY, MGMT_OP_USER_PASSKEY_NEG_REPLY, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_OP_ADD_REMOTE_OOB_DATA, MGMT_OP_REMOVE_REMOTE_OOB_DATA, MGMT_OP_START_DISCOVERY, MGMT_OP_STOP_DISCOVERY, MGMT_OP_CONFIRM_NAME, MGMT_OP_BLOCK_DEVICE, MGMT_OP_UNBLOCK_DEVICE, MGMT_OP_SET_DEVICE_ID, MGMT_OP_SET_ADVERTISING, MGMT_OP_SET_BREDR, MGMT_OP_SET_STATIC_ADDRESS, MGMT_OP_SET_SCAN_PARAMS, MGMT_OP_SET_SECURE_CONN, MGMT_OP_SET_DEBUG_KEYS, MGMT_OP_SET_PRIVACY, MGMT_OP_LOAD_IRKS, MGMT_OP_GET_CONN_INFO, MGMT_OP_GET_CLOCK_INFO, MGMT_OP_ADD_DEVICE, MGMT_OP_REMOVE_DEVICE, MGMT_OP_LOAD_CONN_PARAM, MGMT_OP_READ_UNCONF_INDEX_LIST, MGMT_OP_READ_CONFIG_INFO, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_OP_SET_PUBLIC_ADDRESS, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, MGMT_OP_READ_EXT_INDEX_LIST, MGMT_OP_READ_ADV_FEATURES, MGMT_OP_ADD_ADVERTISING, MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, MGMT_OP_READ_EXT_INFO, MGMT_OP_SET_APPEARANCE, MGMT_OP_GET_PHY_CONFIGURATION, MGMT_OP_SET_PHY_CONFIGURATION, MGMT_OP_SET_BLOCKED_KEYS, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_OP_READ_CONTROLLER_CAP, MGMT_OP_READ_EXP_FEATURES_INFO, MGMT_OP_SET_EXP_FEATURE, MGMT_OP_READ_DEF_SYSTEM_CONFIG, MGMT_OP_SET_DEF_SYSTEM_CONFIG, MGMT_OP_READ_DEF_RUNTIME_CONFIG, MGMT_OP_SET_DEF_RUNTIME_CONFIG, MGMT_OP_GET_DEVICE_FLAGS, MGMT_OP_SET_DEVICE_FLAGS, MGMT_OP_READ_ADV_MONITOR_FEATURES, MGMT_OP_ADD_ADV_PATTERNS_MONITOR, MGMT_OP_REMOVE_ADV_MONITOR, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, MGMT_OP_SET_MESH_RECEIVER, MGMT_OP_MESH_READ_FEATURES, MGMT_OP_MESH_SEND, MGMT_OP_MESH_SEND_CANCEL, }; static const u16 mgmt_events[] = { MGMT_EV_CONTROLLER_ERROR, MGMT_EV_INDEX_ADDED, MGMT_EV_INDEX_REMOVED, MGMT_EV_NEW_SETTINGS, MGMT_EV_CLASS_OF_DEV_CHANGED, MGMT_EV_LOCAL_NAME_CHANGED, MGMT_EV_NEW_LINK_KEY, MGMT_EV_NEW_LONG_TERM_KEY, MGMT_EV_DEVICE_CONNECTED, MGMT_EV_DEVICE_DISCONNECTED, MGMT_EV_CONNECT_FAILED, MGMT_EV_PIN_CODE_REQUEST, MGMT_EV_USER_CONFIRM_REQUEST, MGMT_EV_USER_PASSKEY_REQUEST, MGMT_EV_AUTH_FAILED, MGMT_EV_DEVICE_FOUND, MGMT_EV_DISCOVERING, MGMT_EV_DEVICE_BLOCKED, MGMT_EV_DEVICE_UNBLOCKED, MGMT_EV_DEVICE_UNPAIRED, MGMT_EV_PASSKEY_NOTIFY, MGMT_EV_NEW_IRK, MGMT_EV_NEW_CSRK, MGMT_EV_DEVICE_ADDED, MGMT_EV_DEVICE_REMOVED, MGMT_EV_NEW_CONN_PARAM, MGMT_EV_UNCONF_INDEX_ADDED, MGMT_EV_UNCONF_INDEX_REMOVED, MGMT_EV_NEW_CONFIG_OPTIONS, MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, MGMT_EV_LOCAL_OOB_DATA_UPDATED, MGMT_EV_ADVERTISING_ADDED, MGMT_EV_ADVERTISING_REMOVED, MGMT_EV_EXT_INFO_CHANGED, MGMT_EV_PHY_CONFIGURATION_CHANGED, MGMT_EV_EXP_FEATURE_CHANGED, MGMT_EV_DEVICE_FLAGS_CHANGED, MGMT_EV_ADV_MONITOR_ADDED, MGMT_EV_ADV_MONITOR_REMOVED, MGMT_EV_CONTROLLER_SUSPEND, MGMT_EV_CONTROLLER_RESUME, MGMT_EV_ADV_MONITOR_DEVICE_FOUND, MGMT_EV_ADV_MONITOR_DEVICE_LOST, }; static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_INDEX_LIST, MGMT_OP_READ_INFO, MGMT_OP_READ_UNCONF_INDEX_LIST, MGMT_OP_READ_CONFIG_INFO, MGMT_OP_READ_EXT_INDEX_LIST, MGMT_OP_READ_EXT_INFO, MGMT_OP_READ_CONTROLLER_CAP, MGMT_OP_READ_EXP_FEATURES_INFO, MGMT_OP_READ_DEF_SYSTEM_CONFIG, MGMT_OP_READ_DEF_RUNTIME_CONFIG, }; static const u16 mgmt_untrusted_events[] = { MGMT_EV_INDEX_ADDED, MGMT_EV_INDEX_REMOVED, MGMT_EV_NEW_SETTINGS, MGMT_EV_CLASS_OF_DEV_CHANGED, MGMT_EV_LOCAL_NAME_CHANGED, MGMT_EV_UNCONF_INDEX_ADDED, MGMT_EV_UNCONF_INDEX_REMOVED, MGMT_EV_NEW_CONFIG_OPTIONS, MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, MGMT_EV_EXT_INFO_CHANGED, MGMT_EV_EXP_FEATURE_CHANGED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" /* HCI to MGMT error code conversion table */ static const u8 mgmt_status_table[] = { MGMT_STATUS_SUCCESS, MGMT_STATUS_UNKNOWN_COMMAND, /* Unknown Command */ MGMT_STATUS_NOT_CONNECTED, /* No Connection */ MGMT_STATUS_FAILED, /* Hardware Failure */ MGMT_STATUS_CONNECT_FAILED, /* Page Timeout */ MGMT_STATUS_AUTH_FAILED, /* Authentication Failed */ MGMT_STATUS_AUTH_FAILED, /* PIN or Key Missing */ MGMT_STATUS_NO_RESOURCES, /* Memory Full */ MGMT_STATUS_TIMEOUT, /* Connection Timeout */ MGMT_STATUS_NO_RESOURCES, /* Max Number of Connections */ MGMT_STATUS_NO_RESOURCES, /* Max Number of SCO Connections */ MGMT_STATUS_ALREADY_CONNECTED, /* ACL Connection Exists */ MGMT_STATUS_BUSY, /* Command Disallowed */ MGMT_STATUS_NO_RESOURCES, /* Rejected Limited Resources */ MGMT_STATUS_REJECTED, /* Rejected Security */ MGMT_STATUS_REJECTED, /* Rejected Personal */ MGMT_STATUS_TIMEOUT, /* Host Timeout */ MGMT_STATUS_NOT_SUPPORTED, /* Unsupported Feature */ MGMT_STATUS_INVALID_PARAMS, /* Invalid Parameters */ MGMT_STATUS_DISCONNECTED, /* OE User Ended Connection */ MGMT_STATUS_NO_RESOURCES, /* OE Low Resources */ MGMT_STATUS_DISCONNECTED, /* OE Power Off */ MGMT_STATUS_DISCONNECTED, /* Connection Terminated */ MGMT_STATUS_BUSY, /* Repeated Attempts */ MGMT_STATUS_REJECTED, /* Pairing Not Allowed */ MGMT_STATUS_FAILED, /* Unknown LMP PDU */ MGMT_STATUS_NOT_SUPPORTED, /* Unsupported Remote Feature */ MGMT_STATUS_REJECTED, /* SCO Offset Rejected */ MGMT_STATUS_REJECTED, /* SCO Interval Rejected */ MGMT_STATUS_REJECTED, /* Air Mode Rejected */ MGMT_STATUS_INVALID_PARAMS, /* Invalid LMP Parameters */ MGMT_STATUS_FAILED, /* Unspecified Error */ MGMT_STATUS_NOT_SUPPORTED, /* Unsupported LMP Parameter Value */ MGMT_STATUS_FAILED, /* Role Change Not Allowed */ MGMT_STATUS_TIMEOUT, /* LMP Response Timeout */ MGMT_STATUS_FAILED, /* LMP Error Transaction Collision */ MGMT_STATUS_FAILED, /* LMP PDU Not Allowed */ MGMT_STATUS_REJECTED, /* Encryption Mode Not Accepted */ MGMT_STATUS_FAILED, /* Unit Link Key Used */ MGMT_STATUS_NOT_SUPPORTED, /* QoS Not Supported */ MGMT_STATUS_TIMEOUT, /* Instant Passed */ MGMT_STATUS_NOT_SUPPORTED, /* Pairing Not Supported */ MGMT_STATUS_FAILED, /* Transaction Collision */ MGMT_STATUS_FAILED, /* Reserved for future use */ MGMT_STATUS_INVALID_PARAMS, /* Unacceptable Parameter */ MGMT_STATUS_REJECTED, /* QoS Rejected */ MGMT_STATUS_NOT_SUPPORTED, /* Classification Not Supported */ MGMT_STATUS_REJECTED, /* Insufficient Security */ MGMT_STATUS_INVALID_PARAMS, /* Parameter Out Of Range */ MGMT_STATUS_FAILED, /* Reserved for future use */ MGMT_STATUS_BUSY, /* Role Switch Pending */ MGMT_STATUS_FAILED, /* Reserved for future use */ MGMT_STATUS_FAILED, /* Slot Violation */ MGMT_STATUS_FAILED, /* Role Switch Failed */ MGMT_STATUS_INVALID_PARAMS, /* EIR Too Large */ MGMT_STATUS_NOT_SUPPORTED, /* Simple Pairing Not Supported */ MGMT_STATUS_BUSY, /* Host Busy Pairing */ MGMT_STATUS_REJECTED, /* Rejected, No Suitable Channel */ MGMT_STATUS_BUSY, /* Controller Busy */ MGMT_STATUS_INVALID_PARAMS, /* Unsuitable Connection Interval */ MGMT_STATUS_TIMEOUT, /* Directed Advertising Timeout */ MGMT_STATUS_AUTH_FAILED, /* Terminated Due to MIC Failure */ MGMT_STATUS_CONNECT_FAILED, /* Connection Establishment Failed */ MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */ }; static u8 mgmt_errno_status(int err) { switch (err) { case 0: return MGMT_STATUS_SUCCESS; case -EPERM: return MGMT_STATUS_REJECTED; case -EINVAL: return MGMT_STATUS_INVALID_PARAMS; case -EOPNOTSUPP: return MGMT_STATUS_NOT_SUPPORTED; case -EBUSY: return MGMT_STATUS_BUSY; case -ETIMEDOUT: return MGMT_STATUS_AUTH_FAILED; case -ENOMEM: return MGMT_STATUS_NO_RESOURCES; case -EISCONN: return MGMT_STATUS_ALREADY_CONNECTED; case -ENOTCONN: return MGMT_STATUS_DISCONNECTED; } return MGMT_STATUS_FAILED; } static u8 mgmt_status(int err) { if (err < 0) return mgmt_errno_status(err); if (err < ARRAY_SIZE(mgmt_status_table)) return mgmt_status_table[err]; return MGMT_STATUS_FAILED; } static int mgmt_index_event(u16 event, struct hci_dev *hdev, void *data, u16 len, int flag) { return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, flag, NULL); } static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, u16 len, int flag, struct sock *skip_sk) { return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, flag, skip_sk); } static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, struct sock *skip_sk) { return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, HCI_SOCK_TRUSTED, skip_sk); } static int mgmt_event_skb(struct sk_buff *skb, struct sock *skip_sk) { return mgmt_send_event_skb(HCI_CHANNEL_CONTROL, skb, HCI_SOCK_TRUSTED, skip_sk); } static u8 le_addr_type(u8 mgmt_addr_type) { if (mgmt_addr_type == BDADDR_LE_PUBLIC) return ADDR_LE_DEV_PUBLIC; else return ADDR_LE_DEV_RANDOM; } void mgmt_fill_version_info(void *ver) { struct mgmt_rp_read_version *rp = ver; rp->version = MGMT_VERSION; rp->revision = cpu_to_le16(MGMT_REVISION); } static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_version rp; bt_dev_dbg(hdev, "sock %p", sk); mgmt_fill_version_info(&rp); return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, sizeof(rp)); } static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_commands *rp; u16 num_commands, num_events; size_t rp_size; int i, err; bt_dev_dbg(hdev, "sock %p", sk); if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) { num_commands = ARRAY_SIZE(mgmt_commands); num_events = ARRAY_SIZE(mgmt_events); } else { num_commands = ARRAY_SIZE(mgmt_untrusted_commands); num_events = ARRAY_SIZE(mgmt_untrusted_events); } rp_size = sizeof(*rp) + ((num_commands + num_events) * sizeof(u16)); rp = kmalloc(rp_size, GFP_KERNEL); if (!rp) return -ENOMEM; rp->num_commands = cpu_to_le16(num_commands); rp->num_events = cpu_to_le16(num_events); if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) { __le16 *opcode = rp->opcodes; for (i = 0; i < num_commands; i++, opcode++) put_unaligned_le16(mgmt_commands[i], opcode); for (i = 0; i < num_events; i++, opcode++) put_unaligned_le16(mgmt_events[i], opcode); } else { __le16 *opcode = rp->opcodes; for (i = 0; i < num_commands; i++, opcode++) put_unaligned_le16(mgmt_untrusted_commands[i], opcode); for (i = 0; i < num_events; i++, opcode++) put_unaligned_le16(mgmt_untrusted_events[i], opcode); } err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_COMMANDS, 0, rp, rp_size); kfree(rp); return err; } static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_index_list *rp; struct hci_dev *d; size_t rp_len; u16 count; int err; bt_dev_dbg(hdev, "sock %p", sk); read_lock(&hci_dev_list_lock); count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } rp_len = sizeof(*rp) + (2 * count); rp = kmalloc(rp_len, GFP_ATOMIC); if (!rp) { read_unlock(&hci_dev_list_lock); return -ENOMEM; } count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (hci_dev_test_flag(d, HCI_SETUP) || hci_dev_test_flag(d, HCI_CONFIG) || hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } } rp->num_controllers = cpu_to_le16(count); rp_len = sizeof(*rp) + (2 * count); read_unlock(&hci_dev_list_lock); err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, 0, rp, rp_len); kfree(rp); return err; } static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_unconf_index_list *rp; struct hci_dev *d; size_t rp_len; u16 count; int err; bt_dev_dbg(hdev, "sock %p", sk); read_lock(&hci_dev_list_lock); count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } rp_len = sizeof(*rp) + (2 * count); rp = kmalloc(rp_len, GFP_ATOMIC); if (!rp) { read_unlock(&hci_dev_list_lock); return -ENOMEM; } count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (hci_dev_test_flag(d, HCI_SETUP) || hci_dev_test_flag(d, HCI_CONFIG) || hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } } rp->num_controllers = cpu_to_le16(count); rp_len = sizeof(*rp) + (2 * count); read_unlock(&hci_dev_list_lock); err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_UNCONF_INDEX_LIST, 0, rp, rp_len); kfree(rp); return err; } static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_ext_index_list *rp; struct hci_dev *d; u16 count; int err; bt_dev_dbg(hdev, "sock %p", sk); read_lock(&hci_dev_list_lock); count = 0; list_for_each_entry(d, &hci_dev_list, list) count++; rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC); if (!rp) { read_unlock(&hci_dev_list_lock); return -ENOMEM; } count = 0; list_for_each_entry(d, &hci_dev_list, list) { if (hci_dev_test_flag(d, HCI_SETUP) || hci_dev_test_flag(d, HCI_CONFIG) || hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) rp->entry[count].type = 0x01; else rp->entry[count].type = 0x00; rp->entry[count].bus = d->bus; rp->entry[count++].index = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } rp->num_controllers = cpu_to_le16(count); read_unlock(&hci_dev_list_lock); /* If this command is called at least once, then all the * default index and unconfigured index events are disabled * and from now on only extended index events are used. */ hci_sock_set_flag(sk, HCI_MGMT_EXT_INDEX_EVENTS); hci_sock_clear_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_clear_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_EXT_INDEX_LIST, 0, rp, struct_size(rp, entry, count)); kfree(rp); return err; } static bool is_configured(struct hci_dev *hdev) { if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) return false; if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && !bacmp(&hdev->public_addr, BDADDR_ANY)) return false; return true; } static __le32 get_missing_options(struct hci_dev *hdev) { u32 options = 0; if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) options |= MGMT_OPTION_EXTERNAL_CONFIG; if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && !bacmp(&hdev->public_addr, BDADDR_ANY)) options |= MGMT_OPTION_PUBLIC_ADDRESS; return cpu_to_le32(options); } static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); return mgmt_limited_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, sizeof(options), HCI_MGMT_OPTION_EVENTS, skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 options = get_missing_options(hdev); return mgmt_cmd_complete(sk, hdev->id, opcode, 0, &options, sizeof(options)); } static int read_config_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_config_info rp; u32 options = 0; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); memset(&rp, 0, sizeof(rp)); rp.manufacturer = cpu_to_le16(hdev->manufacturer); if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) options |= MGMT_OPTION_EXTERNAL_CONFIG; if (hdev->set_bdaddr) options |= MGMT_OPTION_PUBLIC_ADDRESS; rp.supported_options = cpu_to_le32(options); rp.missing_options = get_missing_options(hdev); hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_CONFIG_INFO, 0, &rp, sizeof(rp)); } static u32 get_supported_phys(struct hci_dev *hdev) { u32 supported_phys = 0; if (lmp_bredr_capable(hdev)) { supported_phys |= MGMT_PHY_BR_1M_1SLOT; if (hdev->features[0][0] & LMP_3SLOT) supported_phys |= MGMT_PHY_BR_1M_3SLOT; if (hdev->features[0][0] & LMP_5SLOT) supported_phys |= MGMT_PHY_BR_1M_5SLOT; if (lmp_edr_2m_capable(hdev)) { supported_phys |= MGMT_PHY_EDR_2M_1SLOT; if (lmp_edr_3slot_capable(hdev)) supported_phys |= MGMT_PHY_EDR_2M_3SLOT; if (lmp_edr_5slot_capable(hdev)) supported_phys |= MGMT_PHY_EDR_2M_5SLOT; if (lmp_edr_3m_capable(hdev)) { supported_phys |= MGMT_PHY_EDR_3M_1SLOT; if (lmp_edr_3slot_capable(hdev)) supported_phys |= MGMT_PHY_EDR_3M_3SLOT; if (lmp_edr_5slot_capable(hdev)) supported_phys |= MGMT_PHY_EDR_3M_5SLOT; } } } if (lmp_le_capable(hdev)) { supported_phys |= MGMT_PHY_LE_1M_TX; supported_phys |= MGMT_PHY_LE_1M_RX; if (hdev->le_features[1] & HCI_LE_PHY_2M) { supported_phys |= MGMT_PHY_LE_2M_TX; supported_phys |= MGMT_PHY_LE_2M_RX; } if (hdev->le_features[1] & HCI_LE_PHY_CODED) { supported_phys |= MGMT_PHY_LE_CODED_TX; supported_phys |= MGMT_PHY_LE_CODED_RX; } } return supported_phys; } static u32 get_selected_phys(struct hci_dev *hdev) { u32 selected_phys = 0; if (lmp_bredr_capable(hdev)) { selected_phys |= MGMT_PHY_BR_1M_1SLOT; if (hdev->pkt_type & (HCI_DM3 | HCI_DH3)) selected_phys |= MGMT_PHY_BR_1M_3SLOT; if (hdev->pkt_type & (HCI_DM5 | HCI_DH5)) selected_phys |= MGMT_PHY_BR_1M_5SLOT; if (lmp_edr_2m_capable(hdev)) { if (!(hdev->pkt_type & HCI_2DH1)) selected_phys |= MGMT_PHY_EDR_2M_1SLOT; if (lmp_edr_3slot_capable(hdev) && !(hdev->pkt_type & HCI_2DH3)) selected_phys |= MGMT_PHY_EDR_2M_3SLOT; if (lmp_edr_5slot_capable(hdev) && !(hdev->pkt_type & HCI_2DH5)) selected_phys |= MGMT_PHY_EDR_2M_5SLOT; if (lmp_edr_3m_capable(hdev)) { if (!(hdev->pkt_type & HCI_3DH1)) selected_phys |= MGMT_PHY_EDR_3M_1SLOT; if (lmp_edr_3slot_capable(hdev) && !(hdev->pkt_type & HCI_3DH3)) selected_phys |= MGMT_PHY_EDR_3M_3SLOT; if (lmp_edr_5slot_capable(hdev) && !(hdev->pkt_type & HCI_3DH5)) selected_phys |= MGMT_PHY_EDR_3M_5SLOT; } } } if (lmp_le_capable(hdev)) { if (hdev->le_tx_def_phys & HCI_LE_SET_PHY_1M) selected_phys |= MGMT_PHY_LE_1M_TX; if (hdev->le_rx_def_phys & HCI_LE_SET_PHY_1M) selected_phys |= MGMT_PHY_LE_1M_RX; if (hdev->le_tx_def_phys & HCI_LE_SET_PHY_2M) selected_phys |= MGMT_PHY_LE_2M_TX; if (hdev->le_rx_def_phys & HCI_LE_SET_PHY_2M) selected_phys |= MGMT_PHY_LE_2M_RX; if (hdev->le_tx_def_phys & HCI_LE_SET_PHY_CODED) selected_phys |= MGMT_PHY_LE_CODED_TX; if (hdev->le_rx_def_phys & HCI_LE_SET_PHY_CODED) selected_phys |= MGMT_PHY_LE_CODED_RX; } return selected_phys; } static u32 get_configurable_phys(struct hci_dev *hdev) { return (get_supported_phys(hdev) & ~MGMT_PHY_BR_1M_1SLOT & ~MGMT_PHY_LE_1M_TX & ~MGMT_PHY_LE_1M_RX); } static u32 get_supported_settings(struct hci_dev *hdev) { u32 settings = 0; settings |= MGMT_SETTING_POWERED; settings |= MGMT_SETTING_BONDABLE; settings |= MGMT_SETTING_DEBUG_KEYS; settings |= MGMT_SETTING_CONNECTABLE; settings |= MGMT_SETTING_DISCOVERABLE; if (lmp_bredr_capable(hdev)) { if (hdev->hci_ver >= BLUETOOTH_VER_1_2) settings |= MGMT_SETTING_FAST_CONNECTABLE; settings |= MGMT_SETTING_BREDR; settings |= MGMT_SETTING_LINK_SECURITY; if (lmp_ssp_capable(hdev)) { settings |= MGMT_SETTING_SSP; } if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) settings |= MGMT_SETTING_WIDEBAND_SPEECH; } if (lmp_le_capable(hdev)) { settings |= MGMT_SETTING_LE; settings |= MGMT_SETTING_SECURE_CONN; settings |= MGMT_SETTING_PRIVACY; settings |= MGMT_SETTING_STATIC_ADDRESS; settings |= MGMT_SETTING_ADVERTISING; } if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || hdev->set_bdaddr) settings |= MGMT_SETTING_CONFIGURATION; if (cis_central_capable(hdev)) settings |= MGMT_SETTING_CIS_CENTRAL; if (cis_peripheral_capable(hdev)) settings |= MGMT_SETTING_CIS_PERIPHERAL; settings |= MGMT_SETTING_PHY_CONFIGURATION; return settings; } static u32 get_current_settings(struct hci_dev *hdev) { u32 settings = 0; if (hdev_is_powered(hdev)) settings |= MGMT_SETTING_POWERED; if (hci_dev_test_flag(hdev, HCI_CONNECTABLE)) settings |= MGMT_SETTING_CONNECTABLE; if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) settings |= MGMT_SETTING_FAST_CONNECTABLE; if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) settings |= MGMT_SETTING_DISCOVERABLE; if (hci_dev_test_flag(hdev, HCI_BONDABLE)) settings |= MGMT_SETTING_BONDABLE; if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) settings |= MGMT_SETTING_BREDR; if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) settings |= MGMT_SETTING_LE; if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) settings |= MGMT_SETTING_LINK_SECURITY; if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) settings |= MGMT_SETTING_SSP; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) settings |= MGMT_SETTING_ADVERTISING; if (hci_dev_test_flag(hdev, HCI_SC_ENABLED)) settings |= MGMT_SETTING_SECURE_CONN; if (hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS)) settings |= MGMT_SETTING_DEBUG_KEYS; if (hci_dev_test_flag(hdev, HCI_PRIVACY)) settings |= MGMT_SETTING_PRIVACY; /* The current setting for static address has two purposes. The * first is to indicate if the static address will be used and * the second is to indicate if it is actually set. * * This means if the static address is not configured, this flag * will never be set. If the address is configured, then if the * address is actually used decides if the flag is set or not. * * For single mode LE only controllers and dual-mode controllers * with BR/EDR disabled, the existence of the static address will * be evaluated. */ if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || !bacmp(&hdev->bdaddr, BDADDR_ANY)) { if (bacmp(&hdev->static_addr, BDADDR_ANY)) settings |= MGMT_SETTING_STATIC_ADDRESS; } if (hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) settings |= MGMT_SETTING_WIDEBAND_SPEECH; if (cis_central_capable(hdev)) settings |= MGMT_SETTING_CIS_CENTRAL; if (cis_peripheral_capable(hdev)) settings |= MGMT_SETTING_CIS_PERIPHERAL; if (bis_capable(hdev)) settings |= MGMT_SETTING_ISO_BROADCASTER; if (sync_recv_capable(hdev)) settings |= MGMT_SETTING_ISO_SYNC_RECEIVER; return settings; } static struct mgmt_pending_cmd *pending_find(u16 opcode, struct hci_dev *hdev) { return mgmt_pending_find(HCI_CHANNEL_CONTROL, opcode, hdev); } u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; /* If there's a pending mgmt command the flags will not yet have * their final values, so check for this first. */ cmd = pending_find(MGMT_OP_SET_DISCOVERABLE, hdev); if (cmd) { struct mgmt_mode *cp = cmd->param; if (cp->val == 0x01) return LE_AD_GENERAL; else if (cp->val == 0x02) return LE_AD_LIMITED; } else { if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) return LE_AD_LIMITED; else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) return LE_AD_GENERAL; } return 0; } bool mgmt_get_connectable(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; /* If there's a pending mgmt command the flag will not yet have * it's final value, so check for this first. */ cmd = pending_find(MGMT_OP_SET_CONNECTABLE, hdev); if (cmd) { struct mgmt_mode *cp = cmd->param; return cp->val; } return hci_dev_test_flag(hdev, HCI_CONNECTABLE); } static int service_cache_sync(struct hci_dev *hdev, void *data) { hci_update_eir_sync(hdev); hci_update_class_sync(hdev); return 0; } static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, service_cache.work); if (!hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) return; hci_cmd_sync_queue(hdev, service_cache_sync, NULL, NULL); } static int rpa_expired_sync(struct hci_dev *hdev, void *data) { /* The generation of a new RPA and programming it into the * controller happens in the hci_req_enable_advertising() * function. */ if (ext_adv_capable(hdev)) return hci_start_ext_adv_sync(hdev, hdev->cur_adv_instance); else return hci_enable_advertising_sync(hdev); } static void rpa_expired(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rpa_expired.work); bt_dev_dbg(hdev, ""); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); if (!hci_dev_test_flag(hdev, HCI_ADVERTISING)) return; hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); } static int set_discoverable_sync(struct hci_dev *hdev, void *data); static void discov_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, discov_off.work); bt_dev_dbg(hdev, ""); hci_dev_lock(hdev); /* When discoverable timeout triggers, then just make sure * the limited discoverable flag is cleared. Even in the case * of a timeout triggered from general discoverable, it is * safe to unconditionally clear the flag. */ hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hdev->discov_timeout = 0; hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL); mgmt_new_settings(hdev); hci_dev_unlock(hdev); } static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev); static void mesh_send_complete(struct hci_dev *hdev, struct mgmt_mesh_tx *mesh_tx, bool silent) { u8 handle = mesh_tx->handle; if (!silent) mgmt_event(MGMT_EV_MESH_PACKET_CMPLT, hdev, &handle, sizeof(handle), NULL); mgmt_mesh_remove(mesh_tx); } static int mesh_send_done_sync(struct hci_dev *hdev, void *data) { struct mgmt_mesh_tx *mesh_tx; hci_dev_clear_flag(hdev, HCI_MESH_SENDING); hci_disable_advertising_sync(hdev); mesh_tx = mgmt_mesh_next(hdev, NULL); if (mesh_tx) mesh_send_complete(hdev, mesh_tx, false); return 0; } static int mesh_send_sync(struct hci_dev *hdev, void *data); static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err); static void mesh_next(struct hci_dev *hdev, void *data, int err) { struct mgmt_mesh_tx *mesh_tx = mgmt_mesh_next(hdev, NULL); if (!mesh_tx) return; err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx, mesh_send_start_complete); if (err < 0) mesh_send_complete(hdev, mesh_tx, false); else hci_dev_set_flag(hdev, HCI_MESH_SENDING); } static void mesh_send_done(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, mesh_send_done.work); if (!hci_dev_test_flag(hdev, HCI_MESH_SENDING)) return; hci_cmd_sync_queue(hdev, mesh_send_done_sync, NULL, mesh_next); } static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) { if (hci_dev_test_flag(hdev, HCI_MGMT)) return; BT_INFO("MGMT ver %d.%d", MGMT_VERSION, MGMT_REVISION); INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off); INIT_DELAYED_WORK(&hdev->rpa_expired, rpa_expired); INIT_DELAYED_WORK(&hdev->mesh_send_done, mesh_send_done); /* Non-mgmt controlled devices get this bit set * implicitly so that pairing works for them, however * for mgmt we require user-space to explicitly enable * it */ hci_dev_clear_flag(hdev, HCI_BONDABLE); hci_dev_set_flag(hdev, HCI_MGMT); } static int read_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_info rp; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); memset(&rp, 0, sizeof(rp)); bacpy(&rp.bdaddr, &hdev->bdaddr); rp.version = hdev->hci_ver; rp.manufacturer = cpu_to_le16(hdev->manufacturer); rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); rp.current_settings = cpu_to_le32(get_current_settings(hdev)); memcpy(rp.dev_class, hdev->dev_class, 3); memcpy(rp.name, hdev->dev_name, sizeof(hdev->dev_name)); memcpy(rp.short_name, hdev->short_name, sizeof(hdev->short_name)); hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_INFO, 0, &rp, sizeof(rp)); } static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) { u16 eir_len = 0; size_t name_len; if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, hdev->dev_class, 3); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE, hdev->appearance); name_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, hdev->dev_name, name_len); name_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT, hdev->short_name, name_len); return eir_len; } static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { char buf[512]; struct mgmt_rp_read_ext_info *rp = (void *)buf; u16 eir_len; bt_dev_dbg(hdev, "sock %p", sk); memset(&buf, 0, sizeof(buf)); hci_dev_lock(hdev); bacpy(&rp->bdaddr, &hdev->bdaddr); rp->version = hdev->hci_ver; rp->manufacturer = cpu_to_le16(hdev->manufacturer); rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); rp->current_settings = cpu_to_le32(get_current_settings(hdev)); eir_len = append_eir_data_to_buf(hdev, rp->eir); rp->eir_len = cpu_to_le16(eir_len); hci_dev_unlock(hdev); /* If this command is called at least once, then the events * for class of device and local name changes are disabled * and only the new extended controller information event * is used. */ hci_sock_set_flag(sk, HCI_MGMT_EXT_INFO_EVENTS); hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, rp, sizeof(*rp) + eir_len); } static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) { char buf[512]; struct mgmt_ev_ext_info_changed *ev = (void *)buf; u16 eir_len; memset(buf, 0, sizeof(buf)); eir_len = append_eir_data_to_buf(hdev, ev->eir); ev->eir_len = cpu_to_le16(eir_len); return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, ev, sizeof(*ev) + eir_len, HCI_MGMT_EXT_INFO_EVENTS, skip); } static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); return mgmt_cmd_complete(sk, hdev->id, opcode, 0, &settings, sizeof(settings)); } void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { struct mgmt_ev_advertising_added ev; ev.instance = instance; mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk); } void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance) { struct mgmt_ev_advertising_removed ev; ev.instance = instance; mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk); } static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { hdev->adv_instance_timeout = 0; cancel_delayed_work(&hdev->adv_instance_expire); } } /* This function requires the caller holds hdev->lock */ static void restart_le_actions(struct hci_dev *hdev) { struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { /* Needed for AUTO_OFF case where might not "really" * have been powered off. */ hci_pend_le_list_del_init(p); switch (p->auto_connect) { case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: hci_pend_le_list_add(p, &hdev->pend_le_conns); break; case HCI_AUTO_CONN_REPORT: hci_pend_le_list_add(p, &hdev->pend_le_reports); break; default: break; } } } static int new_settings(struct hci_dev *hdev, struct sock *skip) { __le32 ev = cpu_to_le32(get_current_settings(hdev)); return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); } static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp; /* Make sure cmd still outstanding. */ if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev)) return; cp = cmd->param; bt_dev_dbg(hdev, "err %d", err); if (!err) { if (cp->val) { hci_dev_lock(hdev); restart_le_actions(hdev); hci_update_passive_scan(hdev); hci_dev_unlock(hdev); } send_settings_rsp(cmd->sk, cmd->opcode, hdev); /* Only call new_setting for power on as power off is deferred * to hdev->power_off work which does call hci_dev_do_close. */ if (cp->val) new_settings(hdev, cmd->sk); } else { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, mgmt_status(err)); } mgmt_pending_remove(cmd); } static int set_powered_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp = cmd->param; BT_DBG("%s", hdev->name); return hci_set_powered_sync(hdev, cp->val); } static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!cp->val) { if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, MGMT_STATUS_BUSY); goto failed; } } if (pending_find(MGMT_OP_SET_POWERED, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, MGMT_STATUS_BUSY); goto failed; } if (!!cp->val == hdev_is_powered(hdev)) { err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } /* Cancel potentially blocking sync operation before power off */ if (cp->val == 0x00) { hci_cmd_sync_cancel_sync(hdev, -EHOSTDOWN); err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); } else { /* Use hci_cmd_sync_submit since hdev might not be running */ err = hci_cmd_sync_submit(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); } if (err < 0) mgmt_pending_remove(cmd); failed: hci_dev_unlock(hdev); return err; } int mgmt_new_settings(struct hci_dev *hdev) { return new_settings(hdev, NULL); } struct cmd_lookup { struct sock *sk; struct hci_dev *hdev; u8 mgmt_status; }; static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); list_del(&cmd->list); if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } mgmt_pending_free(cmd); } static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); mgmt_pending_remove(cmd); } static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) { if (cmd->cmd_complete) { u8 *status = data; cmd->cmd_complete(cmd, *status); mgmt_pending_remove(cmd); return; } cmd_status_rsp(cmd, data); } static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, cmd->param_len); } static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, sizeof(struct mgmt_addr_info)); } static u8 mgmt_bredr_support(struct hci_dev *hdev) { if (!lmp_bredr_capable(hdev)) return MGMT_STATUS_NOT_SUPPORTED; else if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return MGMT_STATUS_REJECTED; else return MGMT_STATUS_SUCCESS; } static u8 mgmt_le_support(struct hci_dev *hdev) { if (!lmp_le_capable(hdev)) return MGMT_STATUS_NOT_SUPPORTED; else if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return MGMT_STATUS_REJECTED; else return MGMT_STATUS_SUCCESS; } static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; bt_dev_dbg(hdev, "err %d", err); /* Make sure cmd still outstanding. */ if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev)) return; hci_dev_lock(hdev); if (err) { u8 mgmt_err = mgmt_status(err); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto done; } if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && hdev->discov_timeout > 0) { int to = msecs_to_jiffies(hdev->discov_timeout * 1000); queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to); } send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev); new_settings(hdev, cmd->sk); done: mgmt_pending_remove(cmd); hci_dev_unlock(hdev); } static int set_discoverable_sync(struct hci_dev *hdev, void *data) { BT_DBG("%s", hdev->name); return hci_update_discoverable_sync(hdev); } static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_discoverable *cp = data; struct mgmt_pending_cmd *cmd; u16 timeout; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_INVALID_PARAMS); timeout = __le16_to_cpu(cp->timeout); /* Disabling discoverable requires that no timeout is set, * and enabling limited discoverable requires a timeout. */ if ((cp->val == 0x00 && timeout > 0) || (cp->val == 0x02 && timeout == 0)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev) && timeout > 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_NOT_POWERED); goto failed; } if (pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_BUSY); goto failed; } if (!hci_dev_test_flag(hdev, HCI_CONNECTABLE)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_REJECTED); goto failed; } if (hdev->advertising_paused) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_BUSY); goto failed; } if (!hdev_is_powered(hdev)) { bool changed = false; /* Setting limited discoverable when powered off is * not a valid operation since it requires a timeout * and so no need to check HCI_LIMITED_DISCOVERABLE. */ if (!!cp->val != hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) { hci_dev_change_flag(hdev, HCI_DISCOVERABLE); changed = true; } err = send_settings_rsp(sk, MGMT_OP_SET_DISCOVERABLE, hdev); if (err < 0) goto failed; if (changed) err = new_settings(hdev, sk); goto failed; } /* If the current mode is the same, then just update the timeout * value with the new value. And if only the timeout gets updated, * then no need for any HCI transactions. */ if (!!cp->val == hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = timeout; if (cp->val && hdev->discov_timeout > 0) { int to = msecs_to_jiffies(hdev->discov_timeout * 1000); queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to); } err = send_settings_rsp(sk, MGMT_OP_SET_DISCOVERABLE, hdev); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } /* Cancel any potential discoverable timeout that might be * still active and store new timeout value. The arming of * the timeout happens in the complete handler. */ cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = timeout; if (cp->val) hci_dev_set_flag(hdev, HCI_DISCOVERABLE); else hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); /* Limited discoverable mode */ if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE); else hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); err = hci_cmd_sync_queue(hdev, set_discoverable_sync, cmd, mgmt_set_discoverable_complete); if (err < 0) mgmt_pending_remove(cmd); failed: hci_dev_unlock(hdev); return err; } static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; bt_dev_dbg(hdev, "err %d", err); /* Make sure cmd still outstanding. */ if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) return; hci_dev_lock(hdev); if (err) { u8 mgmt_err = mgmt_status(err); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); goto done; } send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); new_settings(hdev, cmd->sk); done: mgmt_pending_remove(cmd); hci_dev_unlock(hdev); } static int set_connectable_update_settings(struct hci_dev *hdev, struct sock *sk, u8 val) { bool changed = false; int err; if (!!val != hci_dev_test_flag(hdev, HCI_CONNECTABLE)) changed = true; if (val) { hci_dev_set_flag(hdev, HCI_CONNECTABLE); } else { hci_dev_clear_flag(hdev, HCI_CONNECTABLE); hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } err = send_settings_rsp(sk, MGMT_OP_SET_CONNECTABLE, hdev); if (err < 0) return err; if (changed) { hci_update_scan(hdev); hci_update_passive_scan(hdev); return new_settings(hdev, sk); } return 0; } static int set_connectable_sync(struct hci_dev *hdev, void *data) { BT_DBG("%s", hdev->name); return hci_update_connectable_sync(hdev); } static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = set_connectable_update_settings(hdev, sk, cp->val); goto failed; } if (pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, MGMT_STATUS_BUSY); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } if (cp->val) { hci_dev_set_flag(hdev, HCI_CONNECTABLE); } else { if (hdev->discov_timeout > 0) cancel_delayed_work(&hdev->discov_off); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_CONNECTABLE); } err = hci_cmd_sync_queue(hdev, set_connectable_sync, cmd, mgmt_set_connectable_complete); if (err < 0) mgmt_pending_remove(cmd); failed: hci_dev_unlock(hdev); return err; } static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; bool changed; int err; bt_dev_dbg(hdev, "sock %p", sk); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) changed = !hci_dev_test_and_set_flag(hdev, HCI_BONDABLE); else changed = hci_dev_test_and_clear_flag(hdev, HCI_BONDABLE); err = send_settings_rsp(sk, MGMT_OP_SET_BONDABLE, hdev); if (err < 0) goto unlock; if (changed) { /* In limited privacy mode the change of bondable mode * may affect the local advertising address. */ hci_update_discoverable(hdev); err = new_settings(hdev, sk); } unlock: hci_dev_unlock(hdev); return err; } static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; u8 val, status; int err; bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_bredr_support(hdev); if (status) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, status); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { bool changed = false; if (!!cp->val != hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) { hci_dev_change_flag(hdev, HCI_LINK_SECURITY); changed = true; } err = send_settings_rsp(sk, MGMT_OP_SET_LINK_SECURITY, hdev); if (err < 0) goto failed; if (changed) err = new_settings(hdev, sk); goto failed; } if (pending_find(MGMT_OP_SET_LINK_SECURITY, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, MGMT_STATUS_BUSY); goto failed; } val = !!cp->val; if (test_bit(HCI_AUTH, &hdev->flags) == val) { err = send_settings_rsp(sk, MGMT_OP_SET_LINK_SECURITY, hdev); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_LINK_SECURITY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } err = hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(val), &val); if (err < 0) { mgmt_pending_remove(cmd); goto failed; } failed: hci_dev_unlock(hdev); return err; } static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) { struct cmd_lookup match = { NULL, hdev }; struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp = cmd->param; u8 enable = cp->val; bool changed; /* Make sure cmd still outstanding. */ if (cmd != pending_find(MGMT_OP_SET_SSP, hdev)) return; if (err) { u8 mgmt_err = mgmt_status(err); if (enable && hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED)) { new_settings(hdev, NULL); } mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, &mgmt_err); return; } if (enable) { changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); } mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); if (changed) new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); hci_update_eir_sync(hdev); } static int set_ssp_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp = cmd->param; bool changed = false; int err; if (cp->val) changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); err = hci_write_ssp_mode_sync(hdev, cp->val); if (!err && changed) hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); return err; } static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; u8 status; int err; bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_bredr_support(hdev); if (status) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, status); if (!lmp_ssp_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { bool changed; if (cp->val) { changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); } err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev); if (err < 0) goto failed; if (changed) err = new_settings(hdev, sk); goto failed; } if (pending_find(MGMT_OP_SET_SSP, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_BUSY); goto failed; } if (!!cp->val == hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_SSP, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_ssp_sync, cmd, set_ssp_complete); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_remove(cmd); } failed: hci_dev_unlock(hdev); return err; } static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { bt_dev_dbg(hdev, "sock %p", sk); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, MGMT_STATUS_NOT_SUPPORTED); } static void set_le_complete(struct hci_dev *hdev, void *data, int err) { struct cmd_lookup match = { NULL, hdev }; u8 status = mgmt_status(err); bt_dev_dbg(hdev, "err %d", err); if (status) { mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, &status); return; } mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); } static int set_le_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp = cmd->param; u8 val = !!cp->val; int err; if (!val) { hci_clear_adv_instance_sync(hdev, NULL, 0x00, true); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) hci_disable_advertising_sync(hdev); if (ext_adv_capable(hdev)) hci_remove_ext_adv_instance_sync(hdev, 0, cmd->sk); } else { hci_dev_set_flag(hdev, HCI_LE_ENABLED); } err = hci_write_le_host_supported_sync(hdev, val, 0); /* Make sure the controller has a good default for * advertising data. Restrict the update to when LE * has actually been enabled. During power on, the * update in powered_update_hci will take care of it. */ if (!err && hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { if (ext_adv_capable(hdev)) { int status; status = hci_setup_ext_adv_instance_sync(hdev, 0x00); if (!status) hci_update_scan_rsp_data_sync(hdev, 0x00); } else { hci_update_adv_data_sync(hdev, 0x00); hci_update_scan_rsp_data_sync(hdev, 0x00); } hci_update_passive_scan(hdev); } return err; } static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; u8 status = mgmt_status(err); struct sock *sk = cmd->sk; if (status) { mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, cmd_status_rsp, &status); return; } mgmt_pending_remove(cmd); mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0); } static int set_mesh_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_set_mesh *cp = cmd->param; size_t len = cmd->param_len; memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types)); if (cp->enable) hci_dev_set_flag(hdev, HCI_MESH); else hci_dev_clear_flag(hdev, HCI_MESH); len -= sizeof(*cp); /* If filters don't fit, forward all adv pkts */ if (len <= sizeof(hdev->mesh_ad_types)) memcpy(hdev->mesh_ad_types, cp->ad_types, len); hci_update_passive_scan_sync(hdev); return 0; } static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_mesh *cp = data; struct mgmt_pending_cmd *cmd; int err = 0; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev) || !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_NOT_SUPPORTED); if (cp->enable != 0x00 && cp->enable != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_mesh_sync, cmd, set_mesh_complete); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_remove(cmd); } hci_dev_unlock(hdev); return err; } static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_mesh_tx *mesh_tx = data; struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param; unsigned long mesh_send_interval; u8 mgmt_err = mgmt_status(err); /* Report any errors here, but don't report completion */ if (mgmt_err) { hci_dev_clear_flag(hdev, HCI_MESH_SENDING); /* Send Complete Error Code for handle */ mesh_send_complete(hdev, mesh_tx, false); return; } mesh_send_interval = msecs_to_jiffies((send->cnt) * 25); queue_delayed_work(hdev->req_workqueue, &hdev->mesh_send_done, mesh_send_interval); } static int mesh_send_sync(struct hci_dev *hdev, void *data) { struct mgmt_mesh_tx *mesh_tx = data; struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param; struct adv_info *adv, *next_instance; u8 instance = hdev->le_num_of_adv_sets + 1; u16 timeout, duration; int err = 0; if (hdev->le_num_of_adv_sets <= hdev->adv_instance_cnt) return MGMT_STATUS_BUSY; timeout = 1000; duration = send->cnt * INTERVAL_TO_MS(hdev->le_adv_max_interval); adv = hci_add_adv_instance(hdev, instance, 0, send->adv_data_len, send->adv_data, 0, NULL, timeout, duration, HCI_ADV_TX_POWER_NO_PREFERENCE, hdev->le_adv_min_interval, hdev->le_adv_max_interval, mesh_tx->handle); if (!IS_ERR(adv)) mesh_tx->instance = instance; else err = PTR_ERR(adv); if (hdev->cur_adv_instance == instance) { /* If the currently advertised instance is being changed then * cancel the current advertising and schedule the next * instance. If there is only one instance then the overridden * advertising data will be visible right away. */ cancel_adv_timeout(hdev); next_instance = hci_get_next_instance(hdev, instance); if (next_instance) instance = next_instance->instance; else instance = 0; } else if (hdev->adv_instance_timeout) { /* Immediately advertise the new instance if no other, or * let it go naturally from queue if ADV is already happening */ instance = 0; } if (instance) return hci_schedule_adv_instance_sync(hdev, instance, true); return err; } static void send_count(struct mgmt_mesh_tx *mesh_tx, void *data) { struct mgmt_rp_mesh_read_features *rp = data; if (rp->used_handles >= rp->max_handles) return; rp->handles[rp->used_handles++] = mesh_tx->handle; } static int mesh_features(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_rp_mesh_read_features rp; if (!lmp_le_capable(hdev) || !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, MGMT_STATUS_NOT_SUPPORTED); memset(&rp, 0, sizeof(rp)); rp.index = cpu_to_le16(hdev->id); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) rp.max_handles = MESH_HANDLES_MAX; hci_dev_lock(hdev); if (rp.max_handles) mgmt_mesh_foreach(hdev, send_count, &rp, sk); mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, 0, &rp, rp.used_handles + sizeof(rp) - MESH_HANDLES_MAX); hci_dev_unlock(hdev); return 0; } static int send_cancel(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_mesh_send_cancel *cancel = (void *)cmd->param; struct mgmt_mesh_tx *mesh_tx; if (!cancel->handle) { do { mesh_tx = mgmt_mesh_next(hdev, cmd->sk); if (mesh_tx) mesh_send_complete(hdev, mesh_tx, false); } while (mesh_tx); } else { mesh_tx = mgmt_mesh_find(hdev, cancel->handle); if (mesh_tx && mesh_tx->sk == cmd->sk) mesh_send_complete(hdev, mesh_tx, false); } mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, 0, NULL, 0); mgmt_pending_free(cmd); return 0; } static int mesh_send_cancel(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_pending_cmd *cmd; int err; if (!lmp_le_capable(hdev) || !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, MGMT_STATUS_NOT_SUPPORTED); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, MGMT_STATUS_REJECTED); hci_dev_lock(hdev); cmd = mgmt_pending_new(sk, MGMT_OP_MESH_SEND_CANCEL, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, send_cancel, cmd, NULL); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_free(cmd); } hci_dev_unlock(hdev); return err; } static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mesh_tx *mesh_tx; struct mgmt_cp_mesh_send *send = data; struct mgmt_rp_mesh_read_features rp; bool sending; int err = 0; if (!lmp_le_capable(hdev) || !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_NOT_SUPPORTED); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || len <= MGMT_MESH_SEND_SIZE || len > (MGMT_MESH_SEND_SIZE + 31)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_REJECTED); hci_dev_lock(hdev); memset(&rp, 0, sizeof(rp)); rp.max_handles = MESH_HANDLES_MAX; mgmt_mesh_foreach(hdev, send_count, &rp, sk); if (rp.max_handles <= rp.used_handles) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_BUSY); goto done; } sending = hci_dev_test_flag(hdev, HCI_MESH_SENDING); mesh_tx = mgmt_mesh_add(sk, hdev, send, len); if (!mesh_tx) err = -ENOMEM; else if (!sending) err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx, mesh_send_start_complete); if (err < 0) { bt_dev_err(hdev, "Send Mesh Failed %d", err); err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_FAILED); if (mesh_tx) { if (sending) mgmt_mesh_remove(mesh_tx); } } else { hci_dev_set_flag(hdev, HCI_MESH_SENDING); mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_SEND, 0, &mesh_tx->handle, 1); } done: hci_dev_unlock(hdev); return err; } static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; int err; u8 val, enabled; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_INVALID_PARAMS); /* Bluetooth single mode LE only controllers or dual-mode * controllers configured as LE only devices, do not allow * switching LE off. These have either LE enabled explicitly * or BR/EDR has been previously switched off. * * When trying to enable an already enabled LE, then gracefully * send a positive response. Trying to disable it however will * result into rejection. */ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { if (cp->val == 0x01) return send_settings_rsp(sk, MGMT_OP_SET_LE, hdev); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_REJECTED); } hci_dev_lock(hdev); val = !!cp->val; enabled = lmp_host_le_capable(hdev); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; if (val != hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { hci_dev_change_flag(hdev, HCI_LE_ENABLED); changed = true; } if (!val && hci_dev_test_flag(hdev, HCI_ADVERTISING)) { hci_dev_clear_flag(hdev, HCI_ADVERTISING); changed = true; } err = send_settings_rsp(sk, MGMT_OP_SET_LE, hdev); if (err < 0) goto unlock; if (changed) err = new_settings(hdev, sk); goto unlock; } if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_SET_ADVERTISING, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_BUSY); goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_LE, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_le_sync, cmd, set_le_complete); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_remove(cmd); } unlock: hci_dev_unlock(hdev); return err; } /* This is a helper function to test for pending mgmt commands that can * cause CoD or EIR HCI commands. We can only allow one such pending * mgmt command at a time since otherwise we cannot easily track what * the current values are, will be, and based on that calculate if a new * HCI command needs to be sent and if yes with what value. */ static bool pending_eir_or_class(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { switch (cmd->opcode) { case MGMT_OP_ADD_UUID: case MGMT_OP_REMOVE_UUID: case MGMT_OP_SET_DEV_CLASS: case MGMT_OP_SET_POWERED: return true; } } return false; } static const u8 bluetooth_base_uuid[] = { 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static u8 get_uuid_size(const u8 *uuid) { u32 val; if (memcmp(uuid, bluetooth_base_uuid, 12)) return 128; val = get_unaligned_le32(&uuid[12]); if (val > 0xffff) return 32; return 16; } static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; bt_dev_dbg(hdev, "err %d", err); mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), hdev->dev_class, 3); mgmt_pending_free(cmd); } static int add_uuid_sync(struct hci_dev *hdev, void *data) { int err; err = hci_update_class_sync(hdev); if (err) return err; return hci_update_eir_sync(hdev); } static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; struct mgmt_pending_cmd *cmd; struct bt_uuid *uuid; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, MGMT_STATUS_BUSY); goto failed; } uuid = kmalloc(sizeof(*uuid), GFP_KERNEL); if (!uuid) { err = -ENOMEM; goto failed; } memcpy(uuid->uuid, cp->uuid, 16); uuid->svc_hint = cp->svc_hint; uuid->size = get_uuid_size(cp->uuid); list_add_tail(&uuid->list, &hdev->uuids); cmd = mgmt_pending_new(sk, MGMT_OP_ADD_UUID, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } /* MGMT_OP_ADD_UUID don't require adapter the UP/Running so use * hci_cmd_sync_submit instead of hci_cmd_sync_queue. */ err = hci_cmd_sync_submit(hdev, add_uuid_sync, cmd, mgmt_class_complete); if (err < 0) { mgmt_pending_free(cmd); goto failed; } failed: hci_dev_unlock(hdev); return err; } static bool enable_service_cache(struct hci_dev *hdev) { if (!hdev_is_powered(hdev)) return false; if (!hci_dev_test_and_set_flag(hdev, HCI_SERVICE_CACHE)) { queue_delayed_work(hdev->workqueue, &hdev->service_cache, CACHE_TIMEOUT); return true; } return false; } static int remove_uuid_sync(struct hci_dev *hdev, void *data) { int err; err = hci_update_class_sync(hdev); if (err) return err; return hci_update_eir_sync(hdev); } static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_uuid *cp = data; struct mgmt_pending_cmd *cmd; struct bt_uuid *match, *tmp; static const u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int err, found; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, MGMT_STATUS_BUSY); goto unlock; } if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) { hci_uuids_clear(hdev); if (enable_service_cache(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, hdev->dev_class, 3); goto unlock; } goto update_class; } found = 0; list_for_each_entry_safe(match, tmp, &hdev->uuids, list) { if (memcmp(match->uuid, cp->uuid, 16) != 0) continue; list_del(&match->list); kfree(match); found++; } if (found == 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, MGMT_STATUS_INVALID_PARAMS); goto unlock; } update_class: cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_UUID, hdev, data, len); if (!cmd) { err = -ENOMEM; goto unlock; } /* MGMT_OP_REMOVE_UUID don't require adapter the UP/Running so use * hci_cmd_sync_submit instead of hci_cmd_sync_queue. */ err = hci_cmd_sync_submit(hdev, remove_uuid_sync, cmd, mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); unlock: hci_dev_unlock(hdev); return err; } static int set_class_sync(struct hci_dev *hdev, void *data) { int err = 0; if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) { cancel_delayed_work_sync(&hdev->service_cache); err = hci_update_eir_sync(hdev); } if (err) return err; return hci_update_class_sync(hdev); } static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_dev_class *cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_bredr_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); if (pending_eir_or_class(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, MGMT_STATUS_BUSY); goto unlock; } if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, MGMT_STATUS_INVALID_PARAMS); goto unlock; } hdev->major_class = cp->major; hdev->minor_class = cp->minor; if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, hdev->dev_class, 3); goto unlock; } cmd = mgmt_pending_new(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len); if (!cmd) { err = -ENOMEM; goto unlock; } /* MGMT_OP_SET_DEV_CLASS don't require adapter the UP/Running so use * hci_cmd_sync_submit instead of hci_cmd_sync_queue. */ err = hci_cmd_sync_submit(hdev, set_class_sync, cmd, mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); unlock: hci_dev_unlock(hdev); return err; } static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_load_link_keys *cp = data; const u16 max_key_count = ((U16_MAX - sizeof(*cp)) / sizeof(struct mgmt_link_key_info)); u16 key_count, expected_len; bool changed; int i; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_bredr_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { bt_dev_err(hdev, "load_link_keys: too big key_count value %u", key_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); } expected_len = struct_size(cp, keys, key_count); if (expected_len != len) { bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes", expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); } if (cp->debug_keys != 0x00 && cp->debug_keys != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); bt_dev_dbg(hdev, "debug_keys %u key_count %u", cp->debug_keys, key_count); hci_dev_lock(hdev); hci_link_keys_clear(hdev); if (cp->debug_keys) changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS); else changed = hci_dev_test_and_clear_flag(hdev, HCI_KEEP_DEBUG_KEYS); if (changed) new_settings(hdev, NULL); for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LINKKEY, key->val)) { bt_dev_warn(hdev, "Skipping blocked link key for %pMR", &key->addr.bdaddr); continue; } if (key->addr.type != BDADDR_BREDR) { bt_dev_warn(hdev, "Invalid link address type %u for %pMR", key->addr.type, &key->addr.bdaddr); continue; } if (key->type > 0x08) { bt_dev_warn(hdev, "Invalid link key type %u for %pMR", key->type, &key->addr.bdaddr); continue; } /* Always ignore debug keys and require a new pairing if * the user wants to use them. */ if (key->type == HCI_LK_DEBUG_COMBINATION) continue; hci_add_link_key(hdev, NULL, &key->addr.bdaddr, key->val, key->type, key->pin_len, NULL); } mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 0, NULL, 0); hci_dev_unlock(hdev); return 0; } static int device_unpaired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, struct sock *skip_sk) { struct mgmt_ev_device_unpaired ev; bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = addr_type; return mgmt_event(MGMT_EV_DEVICE_UNPAIRED, hdev, &ev, sizeof(ev), skip_sk); } static void unpair_device_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_unpair_device *cp = cmd->param; if (!err) device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); cmd->cmd_complete(cmd, err); mgmt_pending_free(cmd); } static int unpair_device_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_unpair_device *cp = cmd->param; struct hci_conn *conn; if (cp->addr.type == BDADDR_BREDR) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); else conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); if (!conn) return 0; /* Disregard any possible error since the likes of hci_abort_conn_sync * will clean up the connection no matter the error. */ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); return 0; } static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_unpair_device *cp = data; struct mgmt_rp_unpair_device rp; struct hci_conn_params *params; struct mgmt_pending_cmd *cmd; struct hci_conn *conn; u8 addr_type; int err; memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); if (cp->disconnect != 0x00 && cp->disconnect != 0x01) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto unlock; } if (cp->addr.type == BDADDR_BREDR) { /* If disconnection is requested, then look up the * connection. If the remote device is connected, it * will be later used to terminate the link. * * Setting it to NULL explicitly will cause no * termination of the link. */ if (cp->disconnect) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); else conn = NULL; err = hci_remove_link_key(hdev, &cp->addr.bdaddr); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, sizeof(rp)); goto unlock; } goto done; } /* LE address type */ addr_type = le_addr_type(cp->addr.type); /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, sizeof(rp)); goto unlock; } conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, addr_type); if (!conn) { hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type); goto done; } /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. */ set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags); /* Disable auto-connection parameters if present */ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (params) { if (params->explicit_connect) params->auto_connect = HCI_AUTO_CONN_EXPLICIT; else params->auto_connect = HCI_AUTO_CONN_DISABLED; } /* If disconnection is not requested, then clear the connection * variable so that the link is not terminated. */ if (!cp->disconnect) conn = NULL; done: /* If the connection variable is set, then termination of the * link is requested. */ if (!conn) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, 0, &rp, sizeof(rp)); device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, sk); goto unlock; } cmd = mgmt_pending_new(sk, MGMT_OP_UNPAIR_DEVICE, hdev, cp, sizeof(*cp)); if (!cmd) { err = -ENOMEM; goto unlock; } cmd->cmd_complete = addr_cmd_complete; err = hci_cmd_sync_queue(hdev, unpair_device_sync, cmd, unpair_device_complete); if (err < 0) mgmt_pending_free(cmd); unlock: hci_dev_unlock(hdev); return err; } static void disconnect_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; cmd->cmd_complete(cmd, mgmt_status(err)); mgmt_pending_free(cmd); } static int disconnect_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_disconnect *cp = cmd->param; struct hci_conn *conn; if (cp->addr.type == BDADDR_BREDR) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); else conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); if (!conn) return -ENOTCONN; /* Disregard any possible error since the likes of hci_abort_conn_sync * will clean up the connection no matter the error. */ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); return 0; } static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_disconnect *cp = data; struct mgmt_rp_disconnect rp; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto failed; } cmd = mgmt_pending_new(sk, MGMT_OP_DISCONNECT, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } cmd->cmd_complete = generic_cmd_complete; err = hci_cmd_sync_queue(hdev, disconnect_sync, cmd, disconnect_complete); if (err < 0) mgmt_pending_free(cmd); failed: hci_dev_unlock(hdev); return err; } static u8 link_to_bdaddr(u8 link_type, u8 addr_type) { switch (link_type) { case ISO_LINK: case LE_LINK: switch (addr_type) { case ADDR_LE_DEV_PUBLIC: return BDADDR_LE_PUBLIC; default: /* Fallback to LE Random address type */ return BDADDR_LE_RANDOM; } default: /* Fallback to BR/EDR type */ return BDADDR_BREDR; } } static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_get_connections *rp; struct hci_conn *c; int err; u16 i; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, MGMT_STATUS_NOT_POWERED); goto unlock; } i = 0; list_for_each_entry(c, &hdev->conn_hash.list, list) { if (test_bit(HCI_CONN_MGMT_CONNECTED, &c->flags)) i++; } rp = kmalloc(struct_size(rp, addr, i), GFP_KERNEL); if (!rp) { err = -ENOMEM; goto unlock; } i = 0; list_for_each_entry(c, &hdev->conn_hash.list, list) { if (!test_bit(HCI_CONN_MGMT_CONNECTED, &c->flags)) continue; bacpy(&rp->addr[i].bdaddr, &c->dst); rp->addr[i].type = link_to_bdaddr(c->type, c->dst_type); if (c->type == SCO_LINK || c->type == ESCO_LINK) continue; i++; } rp->conn_count = cpu_to_le16(i); /* Recalculate length in case of filtered SCO connections, etc */ err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp, struct_size(rp, addr, i)); kfree(rp); unlock: hci_dev_unlock(hdev); return err; } static int send_pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_pin_code_neg_reply *cp) { struct mgmt_pending_cmd *cmd; int err; cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, hdev, cp, sizeof(*cp)); if (!cmd) return -ENOMEM; cmd->cmd_complete = addr_cmd_complete; err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->addr.bdaddr), &cp->addr.bdaddr); if (err < 0) mgmt_pending_remove(cmd); return err; } static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct hci_conn *conn; struct mgmt_cp_pin_code_reply *cp = data; struct hci_cp_pin_code_reply reply; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, MGMT_STATUS_NOT_POWERED); goto failed; } conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); if (!conn) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, MGMT_STATUS_NOT_CONNECTED); goto failed; } if (conn->pending_sec_level == BT_SECURITY_HIGH && cp->pin_len != 16) { struct mgmt_cp_pin_code_neg_reply ncp; memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr)); bt_dev_err(hdev, "PIN code is not 16 bytes long"); err = send_pin_code_neg_reply(sk, hdev, &ncp); if (err >= 0) err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, MGMT_STATUS_INVALID_PARAMS); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } cmd->cmd_complete = addr_cmd_complete; bacpy(&reply.bdaddr, &cp->addr.bdaddr); reply.pin_len = cp->pin_len; memcpy(reply.pin_code, cp->pin_code, sizeof(reply.pin_code)); err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply); if (err < 0) mgmt_pending_remove(cmd); failed: hci_dev_unlock(hdev); return err; } static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_io_capability *cp = data; bt_dev_dbg(hdev, "sock %p", sk); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); hdev->io_capability = cp->io_capability; bt_dev_dbg(hdev, "IO capability set to 0x%02x", hdev->io_capability); hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, 0, NULL, 0); } static struct mgmt_pending_cmd *find_pairing(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct mgmt_pending_cmd *cmd; list_for_each_entry(cmd, &hdev->mgmt_pending, list) { if (cmd->opcode != MGMT_OP_PAIR_DEVICE) continue; if (cmd->user_data != conn) continue; return cmd; } return NULL; } static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) { struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; int err; bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ conn->connect_cfm_cb = NULL; conn->security_cfm_cb = NULL; conn->disconn_cfm_cb = NULL; hci_conn_drop(conn); /* The device is paired so there is no need to remove * its connection parameters anymore. */ clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags); hci_conn_put(conn); return err; } void mgmt_smp_complete(struct hci_conn *conn, bool complete) { u8 status = complete ? MGMT_STATUS_SUCCESS : MGMT_STATUS_FAILED; struct mgmt_pending_cmd *cmd; cmd = find_pairing(conn); if (cmd) { cmd->cmd_complete(cmd, status); mgmt_pending_remove(cmd); } } static void pairing_complete_cb(struct hci_conn *conn, u8 status) { struct mgmt_pending_cmd *cmd; BT_DBG("status %u", status); cmd = find_pairing(conn); if (!cmd) { BT_DBG("Unable to find a pending command"); return; } cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); } static void le_pairing_complete_cb(struct hci_conn *conn, u8 status) { struct mgmt_pending_cmd *cmd; BT_DBG("status %u", status); if (!status) return; cmd = find_pairing(conn); if (!cmd) { BT_DBG("Unable to find a pending command"); return; } cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); } static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_pair_device *cp = data; struct mgmt_rp_pair_device rp; struct mgmt_pending_cmd *cmd; u8 sec_level, auth_type; struct hci_conn *conn; int err; bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); if (cp->io_cap > SMP_IO_KEYBOARD_DISPLAY) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto unlock; } if (hci_bdaddr_is_paired(hdev, &cp->addr.bdaddr, cp->addr.type)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, MGMT_STATUS_ALREADY_PAIRED, &rp, sizeof(rp)); goto unlock; } sec_level = BT_SECURITY_MEDIUM; auth_type = HCI_AT_DEDICATED_BONDING; if (cp->addr.type == BDADDR_BREDR) { conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level, auth_type, CONN_REASON_PAIR_DEVICE, HCI_ACL_CONN_TIMEOUT); } else { u8 addr_type = le_addr_type(cp->addr.type); struct hci_conn_params *p; /* When pairing a new device, it is expected to remember * this device for future connections. Adding the connection * parameter information ahead of time allows tracking * of the peripheral preferred values and will speed up any * further connection establishment. * * If connection parameters already exist, then they * will be kept and this function does nothing. */ p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); if (!p) { err = -EIO; goto unlock; } if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) p->auto_connect = HCI_AUTO_CONN_DISABLED; conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type, sec_level, HCI_LE_CONN_TIMEOUT, CONN_REASON_PAIR_DEVICE); } if (IS_ERR(conn)) { int status; if (PTR_ERR(conn) == -EBUSY) status = MGMT_STATUS_BUSY; else if (PTR_ERR(conn) == -EOPNOTSUPP) status = MGMT_STATUS_NOT_SUPPORTED; else if (PTR_ERR(conn) == -ECONNREFUSED) status = MGMT_STATUS_REJECTED; else status = MGMT_STATUS_CONNECT_FAILED; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, status, &rp, sizeof(rp)); goto unlock; } if (conn->connect_cfm_cb) { hci_conn_drop(conn); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len); if (!cmd) { err = -ENOMEM; hci_conn_drop(conn); goto unlock; } cmd->cmd_complete = pairing_complete; /* For LE, just connecting isn't a proof that the pairing finished */ if (cp->addr.type == BDADDR_BREDR) { conn->connect_cfm_cb = pairing_complete_cb; conn->security_cfm_cb = pairing_complete_cb; conn->disconn_cfm_cb = pairing_complete_cb; } else { conn->connect_cfm_cb = le_pairing_complete_cb; conn->security_cfm_cb = le_pairing_complete_cb; conn->disconn_cfm_cb = le_pairing_complete_cb; } conn->io_capability = cp->io_cap; cmd->user_data = hci_conn_get(conn); if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) && hci_conn_security(conn, sec_level, auth_type, true)) { cmd->cmd_complete(cmd, 0); mgmt_pending_remove(cmd); } err = 0; unlock: hci_dev_unlock(hdev); return err; } static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_addr_info *addr = data; struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, MGMT_STATUS_NOT_POWERED); goto unlock; } cmd = pending_find(MGMT_OP_PAIR_DEVICE, hdev); if (!cmd) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, MGMT_STATUS_INVALID_PARAMS); goto unlock; } conn = cmd->user_data; if (bacmp(&addr->bdaddr, &conn->dst) != 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, MGMT_STATUS_INVALID_PARAMS); goto unlock; } cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED); mgmt_pending_remove(cmd); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0, addr, sizeof(*addr)); /* Since user doesn't want to proceed with the connection, abort any * ongoing pairing and then terminate the link if it was created * because of the pair device action. */ if (addr->type == BDADDR_BREDR) hci_remove_link_key(hdev, &addr->bdaddr); else smp_cancel_and_remove_pairing(hdev, &addr->bdaddr, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); unlock: hci_dev_unlock(hdev); return err; } static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, struct mgmt_addr_info *addr, u16 mgmt_op, u16 hci_op, __le32 passkey) { struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, MGMT_STATUS_NOT_POWERED, addr, sizeof(*addr)); goto done; } if (addr->type == BDADDR_BREDR) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr); else conn = hci_conn_hash_lookup_le(hdev, &addr->bdaddr, le_addr_type(addr->type)); if (!conn) { err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, MGMT_STATUS_NOT_CONNECTED, addr, sizeof(*addr)); goto done; } if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) { err = smp_user_confirm_reply(conn, mgmt_op, passkey); if (!err) err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, MGMT_STATUS_SUCCESS, addr, sizeof(*addr)); else err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, MGMT_STATUS_FAILED, addr, sizeof(*addr)); goto done; } cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr)); if (!cmd) { err = -ENOMEM; goto done; } cmd->cmd_complete = addr_cmd_complete; /* Continue with pairing via HCI */ if (hci_op == HCI_OP_USER_PASSKEY_REPLY) { struct hci_cp_user_passkey_reply cp; bacpy(&cp.bdaddr, &addr->bdaddr); cp.passkey = passkey; err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp); } else err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr), &addr->bdaddr); if (err < 0) mgmt_pending_remove(cmd); done: hci_dev_unlock(hdev); return err; } static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_pin_code_neg_reply *cp = data; bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_PIN_CODE_NEG_REPLY, HCI_OP_PIN_CODE_NEG_REPLY, 0); } static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_user_confirm_reply *cp = data; bt_dev_dbg(hdev, "sock %p", sk); if (len != sizeof(*cp)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, MGMT_STATUS_INVALID_PARAMS); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_CONFIRM_REPLY, HCI_OP_USER_CONFIRM_REPLY, 0); } static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_user_confirm_neg_reply *cp = data; bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_CONFIRM_NEG_REPLY, HCI_OP_USER_CONFIRM_NEG_REPLY, 0); } static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_user_passkey_reply *cp = data; bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_PASSKEY_REPLY, HCI_OP_USER_PASSKEY_REPLY, cp->passkey); } static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_user_passkey_neg_reply *cp = data; bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_PASSKEY_NEG_REPLY, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } static int adv_expire_sync(struct hci_dev *hdev, u32 flags) { struct adv_info *adv_instance; adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); if (!adv_instance) return 0; /* stop if current instance doesn't need to be changed */ if (!(adv_instance->flags & flags)) return 0; cancel_adv_timeout(hdev); adv_instance = hci_get_next_instance(hdev, adv_instance->instance); if (!adv_instance) return 0; hci_schedule_adv_instance_sync(hdev, adv_instance->instance, true); return 0; } static int name_changed_sync(struct hci_dev *hdev, void *data) { return adv_expire_sync(hdev, MGMT_ADV_FLAG_LOCAL_NAME); } static void set_name_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_set_local_name *cp = cmd->param; u8 status = mgmt_status(err); bt_dev_dbg(hdev, "err %d", err); if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev)) return; if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, status); } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, cp, sizeof(*cp)); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) hci_cmd_sync_queue(hdev, name_changed_sync, NULL, NULL); } mgmt_pending_remove(cmd); } static int set_name_sync(struct hci_dev *hdev, void *data) { if (lmp_bredr_capable(hdev)) { hci_update_name_sync(hdev); hci_update_eir_sync(hdev); } /* The name is stored in the scan response data and so * no need to update the advertising data here. */ if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING)) hci_update_scan_rsp_data_sync(hdev, hdev->cur_adv_instance); return 0; } static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_local_name *cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); /* If the old values are the same as the new ones just return a * direct command complete event. */ if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) && !memcmp(hdev->short_name, cp->short_name, sizeof(hdev->short_name))) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, data, len); goto failed; } memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name)); if (!hdev_is_powered(hdev)) { memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name)); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, data, len); if (err < 0) goto failed; err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); ext_info_changed(hdev, sk); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_LOCAL_NAME, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_name_sync, cmd, set_name_complete); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_remove(cmd); goto failed; } memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name)); failed: hci_dev_unlock(hdev); return err; } static int appearance_changed_sync(struct hci_dev *hdev, void *data) { return adv_expire_sync(hdev, MGMT_ADV_FLAG_APPEARANCE); } static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_appearance *cp = data; u16 appearance; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE, MGMT_STATUS_NOT_SUPPORTED); appearance = le16_to_cpu(cp->appearance); hci_dev_lock(hdev); if (hdev->appearance != appearance) { hdev->appearance = appearance; if (hci_dev_test_flag(hdev, HCI_LE_ADV)) hci_cmd_sync_queue(hdev, appearance_changed_sync, NULL, NULL); ext_info_changed(hdev, sk); } err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, 0); hci_dev_unlock(hdev); return err; } static int get_phy_configuration(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_rp_get_phy_configuration rp; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); memset(&rp, 0, sizeof(rp)); rp.supported_phys = cpu_to_le32(get_supported_phys(hdev)); rp.selected_phys = cpu_to_le32(get_selected_phys(hdev)); rp.configurable_phys = cpu_to_le32(get_configurable_phys(hdev)); hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_PHY_CONFIGURATION, 0, &rp, sizeof(rp)); } int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip) { struct mgmt_ev_phy_configuration_changed ev; memset(&ev, 0, sizeof(ev)); ev.selected_phys = cpu_to_le32(get_selected_phys(hdev)); return mgmt_event(MGMT_EV_PHY_CONFIGURATION_CHANGED, hdev, &ev, sizeof(ev), skip); } static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct sk_buff *skb = cmd->skb; u8 status = mgmt_status(err); if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev)) return; if (!status) { if (!skb) status = MGMT_STATUS_FAILED; else if (IS_ERR(skb)) status = mgmt_status(PTR_ERR(skb)); else status = mgmt_status(skb->data[0]); } bt_dev_dbg(hdev, "status %d", status); if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, status); } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, 0, NULL, 0); mgmt_phy_configuration_changed(hdev, cmd->sk); } if (skb && !IS_ERR(skb)) kfree_skb(skb); mgmt_pending_remove(cmd); } static int set_default_phy_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_set_phy_configuration *cp = cmd->param; struct hci_cp_le_set_default_phy cp_phy; u32 selected_phys = __le32_to_cpu(cp->selected_phys); memset(&cp_phy, 0, sizeof(cp_phy)); if (!(selected_phys & MGMT_PHY_LE_TX_MASK)) cp_phy.all_phys |= 0x01; if (!(selected_phys & MGMT_PHY_LE_RX_MASK)) cp_phy.all_phys |= 0x02; if (selected_phys & MGMT_PHY_LE_1M_TX) cp_phy.tx_phys |= HCI_LE_SET_PHY_1M; if (selected_phys & MGMT_PHY_LE_2M_TX) cp_phy.tx_phys |= HCI_LE_SET_PHY_2M; if (selected_phys & MGMT_PHY_LE_CODED_TX) cp_phy.tx_phys |= HCI_LE_SET_PHY_CODED; if (selected_phys & MGMT_PHY_LE_1M_RX) cp_phy.rx_phys |= HCI_LE_SET_PHY_1M; if (selected_phys & MGMT_PHY_LE_2M_RX) cp_phy.rx_phys |= HCI_LE_SET_PHY_2M; if (selected_phys & MGMT_PHY_LE_CODED_RX) cp_phy.rx_phys |= HCI_LE_SET_PHY_CODED; cmd->skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp_phy), &cp_phy, HCI_CMD_TIMEOUT); return 0; } static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_phy_configuration *cp = data; struct mgmt_pending_cmd *cmd; u32 selected_phys, configurable_phys, supported_phys, unconfigure_phys; u16 pkt_type = (HCI_DH1 | HCI_DM1); bool changed = false; int err; bt_dev_dbg(hdev, "sock %p", sk); configurable_phys = get_configurable_phys(hdev); supported_phys = get_supported_phys(hdev); selected_phys = __le32_to_cpu(cp->selected_phys); if (selected_phys & ~supported_phys) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, MGMT_STATUS_INVALID_PARAMS); unconfigure_phys = supported_phys & ~configurable_phys; if ((selected_phys & unconfigure_phys) != unconfigure_phys) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, MGMT_STATUS_INVALID_PARAMS); if (selected_phys == get_selected_phys(hdev)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, 0, NULL, 0); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, MGMT_STATUS_REJECTED); goto unlock; } if (pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, MGMT_STATUS_BUSY); goto unlock; } if (selected_phys & MGMT_PHY_BR_1M_3SLOT) pkt_type |= (HCI_DH3 | HCI_DM3); else pkt_type &= ~(HCI_DH3 | HCI_DM3); if (selected_phys & MGMT_PHY_BR_1M_5SLOT) pkt_type |= (HCI_DH5 | HCI_DM5); else pkt_type &= ~(HCI_DH5 | HCI_DM5); if (selected_phys & MGMT_PHY_EDR_2M_1SLOT) pkt_type &= ~HCI_2DH1; else pkt_type |= HCI_2DH1; if (selected_phys & MGMT_PHY_EDR_2M_3SLOT) pkt_type &= ~HCI_2DH3; else pkt_type |= HCI_2DH3; if (selected_phys & MGMT_PHY_EDR_2M_5SLOT) pkt_type &= ~HCI_2DH5; else pkt_type |= HCI_2DH5; if (selected_phys & MGMT_PHY_EDR_3M_1SLOT) pkt_type &= ~HCI_3DH1; else pkt_type |= HCI_3DH1; if (selected_phys & MGMT_PHY_EDR_3M_3SLOT) pkt_type &= ~HCI_3DH3; else pkt_type |= HCI_3DH3; if (selected_phys & MGMT_PHY_EDR_3M_5SLOT) pkt_type &= ~HCI_3DH5; else pkt_type |= HCI_3DH5; if (pkt_type != hdev->pkt_type) { hdev->pkt_type = pkt_type; changed = true; } if ((selected_phys & MGMT_PHY_LE_MASK) == (get_selected_phys(hdev) & MGMT_PHY_LE_MASK)) { if (changed) mgmt_phy_configuration_changed(hdev, sk); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, 0, NULL, 0); goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_PHY_CONFIGURATION, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_default_phy_sync, cmd, set_default_phy_complete); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_remove(cmd); } unlock: hci_dev_unlock(hdev); return err; } static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { int err = MGMT_STATUS_SUCCESS; struct mgmt_cp_set_blocked_keys *keys = data; const u16 max_key_count = ((U16_MAX - sizeof(*keys)) / sizeof(struct mgmt_blocked_key_info)); u16 key_count, expected_len; int i; bt_dev_dbg(hdev, "sock %p", sk); key_count = __le16_to_cpu(keys->key_count); if (key_count > max_key_count) { bt_dev_err(hdev, "too big key_count value %u", key_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BLOCKED_KEYS, MGMT_STATUS_INVALID_PARAMS); } expected_len = struct_size(keys, keys, key_count); if (expected_len != len) { bt_dev_err(hdev, "expected %u bytes, got %u bytes", expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BLOCKED_KEYS, MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); hci_blocked_keys_clear(hdev); for (i = 0; i < key_count; ++i) { struct blocked_key *b = kzalloc(sizeof(*b), GFP_KERNEL); if (!b) { err = MGMT_STATUS_NO_RESOURCES; break; } b->type = keys->keys[i].type; memcpy(b->val, keys->keys[i].val, sizeof(b->val)); list_add_rcu(&b->list, &hdev->blocked_keys); } hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_BLOCKED_KEYS, err, NULL, 0); } static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; int err; bool changed = false; bt_dev_dbg(hdev, "sock %p", sk); if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (hdev_is_powered(hdev) && !!cp->val != hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_STATUS_REJECTED); goto unlock; } if (cp->val) changed = !hci_dev_test_and_set_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); else changed = hci_dev_test_and_clear_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); err = send_settings_rsp(sk, MGMT_OP_SET_WIDEBAND_SPEECH, hdev); if (err < 0) goto unlock; if (changed) err = new_settings(hdev, sk); unlock: hci_dev_unlock(hdev); return err; } static int read_controller_cap(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { char buf[20]; struct mgmt_rp_read_controller_cap *rp = (void *)buf; u16 cap_len = 0; u8 flags = 0; u8 tx_power_range[2]; bt_dev_dbg(hdev, "sock %p", sk); memset(&buf, 0, sizeof(buf)); hci_dev_lock(hdev); /* When the Read Simple Pairing Options command is supported, then * the remote public key validation is supported. * * Alternatively, when Microsoft extensions are available, they can * indicate support for public key validation as well. */ if ((hdev->commands[41] & 0x08) || msft_curve_validity(hdev)) flags |= 0x01; /* Remote public key validation (BR/EDR) */ flags |= 0x02; /* Remote public key validation (LE) */ /* When the Read Encryption Key Size command is supported, then the * encryption key size is enforced. */ if (hdev->commands[20] & 0x10) flags |= 0x04; /* Encryption key size enforcement (BR/EDR) */ flags |= 0x08; /* Encryption key size enforcement (LE) */ cap_len = eir_append_data(rp->cap, cap_len, MGMT_CAP_SEC_FLAGS, &flags, 1); /* When the Read Simple Pairing Options command is supported, then * also max encryption key size information is provided. */ if (hdev->commands[41] & 0x08) cap_len = eir_append_le16(rp->cap, cap_len, MGMT_CAP_MAX_ENC_KEY_SIZE, hdev->max_enc_key_size); cap_len = eir_append_le16(rp->cap, cap_len, MGMT_CAP_SMP_MAX_ENC_KEY_SIZE, SMP_MAX_ENC_KEY_SIZE); /* Append the min/max LE tx power parameters if we were able to fetch * it from the controller */ if (hdev->commands[38] & 0x80) { memcpy(&tx_power_range[0], &hdev->min_le_tx_power, 1); memcpy(&tx_power_range[1], &hdev->max_le_tx_power, 1); cap_len = eir_append_data(rp->cap, cap_len, MGMT_CAP_LE_TX_PWR, tx_power_range, 2); } rp->cap_len = cpu_to_le16(cap_len); hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_CONTROLLER_CAP, 0, rp, sizeof(*rp) + cap_len); } #ifdef CONFIG_BT_FEATURE_DEBUG /* d4992530-b9ec-469f-ab01-6c481c47da1c */ static const u8 debug_uuid[16] = { 0x1c, 0xda, 0x47, 0x1c, 0x48, 0x6c, 0x01, 0xab, 0x9f, 0x46, 0xec, 0xb9, 0x30, 0x25, 0x99, 0xd4, }; #endif /* 330859bc-7506-492d-9370-9a6f0614037f */ static const u8 quality_report_uuid[16] = { 0x7f, 0x03, 0x14, 0x06, 0x6f, 0x9a, 0x70, 0x93, 0x2d, 0x49, 0x06, 0x75, 0xbc, 0x59, 0x08, 0x33, }; /* a6695ace-ee7f-4fb9-881a-5fac66c629af */ static const u8 offload_codecs_uuid[16] = { 0xaf, 0x29, 0xc6, 0x66, 0xac, 0x5f, 0x1a, 0x88, 0xb9, 0x4f, 0x7f, 0xee, 0xce, 0x5a, 0x69, 0xa6, }; /* 671b10b5-42c0-4696-9227-eb28d1b049d6 */ static const u8 le_simultaneous_roles_uuid[16] = { 0xd6, 0x49, 0xb0, 0xd1, 0x28, 0xeb, 0x27, 0x92, 0x96, 0x46, 0xc0, 0x42, 0xb5, 0x10, 0x1b, 0x67, }; /* 15c0a148-c273-11ea-b3de-0242ac130004 */ static const u8 rpa_resolution_uuid[16] = { 0x04, 0x00, 0x13, 0xac, 0x42, 0x02, 0xde, 0xb3, 0xea, 0x11, 0x73, 0xc2, 0x48, 0xa1, 0xc0, 0x15, }; /* 6fbaf188-05e0-496a-9885-d6ddfdb4e03e */ static const u8 iso_socket_uuid[16] = { 0x3e, 0xe0, 0xb4, 0xfd, 0xdd, 0xd6, 0x85, 0x98, 0x6a, 0x49, 0xe0, 0x05, 0x88, 0xf1, 0xba, 0x6f, }; /* 2ce463d7-7a03-4d8d-bf05-5f24e8f36e76 */ static const u8 mgmt_mesh_uuid[16] = { 0x76, 0x6e, 0xf3, 0xe8, 0x24, 0x5f, 0x05, 0xbf, 0x8d, 0x4d, 0x03, 0x7a, 0xd7, 0x63, 0xe4, 0x2c, }; static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_exp_features_info *rp; size_t len; u16 idx = 0; u32 flags; int status; bt_dev_dbg(hdev, "sock %p", sk); /* Enough space for 7 features */ len = sizeof(*rp) + (sizeof(rp->features[0]) * 7); rp = kzalloc(len, GFP_KERNEL); if (!rp) return -ENOMEM; #ifdef CONFIG_BT_FEATURE_DEBUG if (!hdev) { flags = bt_dbg_get() ? BIT(0) : 0; memcpy(rp->features[idx].uuid, debug_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } #endif if (hdev && hci_dev_le_state_simultaneous(hdev)) { if (hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES)) flags = BIT(0); else flags = 0; memcpy(rp->features[idx].uuid, le_simultaneous_roles_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } if (hdev && ll_privacy_capable(hdev)) { if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) flags = BIT(0) | BIT(1); else flags = BIT(1); memcpy(rp->features[idx].uuid, rpa_resolution_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } if (hdev && (aosp_has_quality_report(hdev) || hdev->set_quality_report)) { if (hci_dev_test_flag(hdev, HCI_QUALITY_REPORT)) flags = BIT(0); else flags = 0; memcpy(rp->features[idx].uuid, quality_report_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } if (hdev && hdev->get_data_path_id) { if (hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) flags = BIT(0); else flags = 0; memcpy(rp->features[idx].uuid, offload_codecs_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } if (IS_ENABLED(CONFIG_BT_LE)) { flags = iso_enabled() ? BIT(0) : 0; memcpy(rp->features[idx].uuid, iso_socket_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } if (hdev && lmp_le_capable(hdev)) { if (hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) flags = BIT(0); else flags = 0; memcpy(rp->features[idx].uuid, mgmt_mesh_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; } rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable * the events to update client on any future change. */ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); status = mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, MGMT_OP_READ_EXP_FEATURES_INFO, 0, rp, sizeof(*rp) + (20 * idx)); kfree(rp); return status; } static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev, struct sock *skip) { struct mgmt_ev_exp_feature_changed ev; memset(&ev, 0, sizeof(ev)); memcpy(ev.uuid, rpa_resolution_uuid, 16); ev.flags = cpu_to_le32((enabled ? BIT(0) : 0) | BIT(1)); // Do we need to be atomic with the conn_flags? if (enabled && privacy_mode_capable(hdev)) hdev->conn_flags |= HCI_CONN_FLAG_DEVICE_PRIVACY; else hdev->conn_flags &= ~HCI_CONN_FLAG_DEVICE_PRIVACY; return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_EXP_FEATURE_EVENTS, skip); } static int exp_feature_changed(struct hci_dev *hdev, const u8 *uuid, bool enabled, struct sock *skip) { struct mgmt_ev_exp_feature_changed ev; memset(&ev, 0, sizeof(ev)); memcpy(ev.uuid, uuid, 16); ev.flags = cpu_to_le32(enabled ? BIT(0) : 0); return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_EXP_FEATURE_EVENTS, skip); } #define EXP_FEAT(_uuid, _set_func) \ { \ .uuid = _uuid, \ .set_func = _set_func, \ } /* The zero key uuid is special. Multiple exp features are set through it. */ static int set_zero_key_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { struct mgmt_rp_set_exp_feature rp; memset(rp.uuid, 0, 16); rp.flags = cpu_to_le32(0); #ifdef CONFIG_BT_FEATURE_DEBUG if (!hdev) { bool changed = bt_dbg_get(); bt_dbg_set(false); if (changed) exp_feature_changed(NULL, ZERO_KEY, false, sk); } #endif if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) { bool changed; changed = hci_dev_test_and_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); if (changed) exp_feature_changed(hdev, rpa_resolution_uuid, false, sk); } hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); } #ifdef CONFIG_BT_FEATURE_DEBUG static int set_debug_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { struct mgmt_rp_set_exp_feature rp; bool val, changed; int err; /* Command requires to use the non-controller index */ if (hdev) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); /* Only boolean on/off is supported */ if (cp->param[0] != 0x00 && cp->param[0] != 0x01) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); val = !!cp->param[0]; changed = val ? !bt_dbg_get() : bt_dbg_get(); bt_dbg_set(val); memcpy(rp.uuid, debug_uuid, 16); rp.flags = cpu_to_le32(val ? BIT(0) : 0); hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); if (changed) exp_feature_changed(hdev, debug_uuid, val, sk); return err; } #endif static int set_mgmt_mesh_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { struct mgmt_rp_set_exp_feature rp; bool val, changed; int err; /* Command requires to use the controller index */ if (!hdev) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); /* Only boolean on/off is supported */ if (cp->param[0] != 0x00 && cp->param[0] != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); val = !!cp->param[0]; if (val) { changed = !hci_dev_test_and_set_flag(hdev, HCI_MESH_EXPERIMENTAL); } else { hci_dev_clear_flag(hdev, HCI_MESH); changed = hci_dev_test_and_clear_flag(hdev, HCI_MESH_EXPERIMENTAL); } memcpy(rp.uuid, mgmt_mesh_uuid, 16); rp.flags = cpu_to_le32(val ? BIT(0) : 0); hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); if (changed) exp_feature_changed(hdev, mgmt_mesh_uuid, val, sk); return err; } static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { struct mgmt_rp_set_exp_feature rp; bool val, changed; int err; u32 flags; /* Command requires to use the controller index */ if (!hdev) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); /* Changes can only be made when controller is powered down */ if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_REJECTED); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); /* Only boolean on/off is supported */ if (cp->param[0] != 0x00 && cp->param[0] != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); val = !!cp->param[0]; if (val) { changed = !hci_dev_test_and_set_flag(hdev, HCI_ENABLE_LL_PRIVACY); hci_dev_clear_flag(hdev, HCI_ADVERTISING); /* Enable LL privacy + supported settings changed */ flags = BIT(0) | BIT(1); } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); /* Disable LL privacy + supported settings changed */ flags = BIT(1); } memcpy(rp.uuid, rpa_resolution_uuid, 16); rp.flags = cpu_to_le32(flags); hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); if (changed) exp_ll_privacy_feature_changed(val, hdev, sk); return err; } static int set_quality_report_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { struct mgmt_rp_set_exp_feature rp; bool val, changed; int err; /* Command requires to use a valid controller index */ if (!hdev) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); /* Only boolean on/off is supported */ if (cp->param[0] != 0x00 && cp->param[0] != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); hci_req_sync_lock(hdev); val = !!cp->param[0]; changed = (val != hci_dev_test_flag(hdev, HCI_QUALITY_REPORT)); if (!aosp_has_quality_report(hdev) && !hdev->set_quality_report) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_NOT_SUPPORTED); goto unlock_quality_report; } if (changed) { if (hdev->set_quality_report) err = hdev->set_quality_report(hdev, val); else err = aosp_set_quality_report(hdev, val); if (err) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_FAILED); goto unlock_quality_report; } if (val) hci_dev_set_flag(hdev, HCI_QUALITY_REPORT); else hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); } bt_dev_dbg(hdev, "quality report enable %d changed %d", val, changed); memcpy(rp.uuid, quality_report_uuid, 16); rp.flags = cpu_to_le32(val ? BIT(0) : 0); hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); if (changed) exp_feature_changed(hdev, quality_report_uuid, val, sk); unlock_quality_report: hci_req_sync_unlock(hdev); return err; } static int set_offload_codec_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { bool val, changed; int err; struct mgmt_rp_set_exp_feature rp; /* Command requires to use a valid controller index */ if (!hdev) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); /* Only boolean on/off is supported */ if (cp->param[0] != 0x00 && cp->param[0] != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); val = !!cp->param[0]; changed = (val != hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)); if (!hdev->get_data_path_id) { return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_NOT_SUPPORTED); } if (changed) { if (val) hci_dev_set_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED); else hci_dev_clear_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED); } bt_dev_info(hdev, "offload codecs enable %d changed %d", val, changed); memcpy(rp.uuid, offload_codecs_uuid, 16); rp.flags = cpu_to_le32(val ? BIT(0) : 0); hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); if (changed) exp_feature_changed(hdev, offload_codecs_uuid, val, sk); return err; } static int set_le_simultaneous_roles_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { bool val, changed; int err; struct mgmt_rp_set_exp_feature rp; /* Command requires to use a valid controller index */ if (!hdev) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); /* Only boolean on/off is supported */ if (cp->param[0] != 0x00 && cp->param[0] != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); val = !!cp->param[0]; changed = (val != hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES)); if (!hci_dev_le_state_simultaneous(hdev)) { return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_NOT_SUPPORTED); } if (changed) { if (val) hci_dev_set_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES); else hci_dev_clear_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES); } bt_dev_info(hdev, "LE simultaneous roles enable %d changed %d", val, changed); memcpy(rp.uuid, le_simultaneous_roles_uuid, 16); rp.flags = cpu_to_le32(val ? BIT(0) : 0); hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); if (changed) exp_feature_changed(hdev, le_simultaneous_roles_uuid, val, sk); return err; } #ifdef CONFIG_BT_LE static int set_iso_socket_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) { struct mgmt_rp_set_exp_feature rp; bool val, changed = false; int err; /* Command requires to use the non-controller index */ if (hdev) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); /* Only boolean on/off is supported */ if (cp->param[0] != 0x00 && cp->param[0] != 0x01) return mgmt_cmd_status(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_PARAMS); val = cp->param[0] ? true : false; if (val) err = iso_init(); else err = iso_exit(); if (!err) changed = true; memcpy(rp.uuid, iso_socket_uuid, 16); rp.flags = cpu_to_le32(val ? BIT(0) : 0); hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, 0, &rp, sizeof(rp)); if (changed) exp_feature_changed(hdev, iso_socket_uuid, val, sk); return err; } #endif static const struct mgmt_exp_feature { const u8 *uuid; int (*set_func)(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len); } exp_features[] = { EXP_FEAT(ZERO_KEY, set_zero_key_func), #ifdef CONFIG_BT_FEATURE_DEBUG EXP_FEAT(debug_uuid, set_debug_func), #endif EXP_FEAT(mgmt_mesh_uuid, set_mgmt_mesh_func), EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func), EXP_FEAT(quality_report_uuid, set_quality_report_func), EXP_FEAT(offload_codecs_uuid, set_offload_codec_func), EXP_FEAT(le_simultaneous_roles_uuid, set_le_simultaneous_roles_func), #ifdef CONFIG_BT_LE EXP_FEAT(iso_socket_uuid, set_iso_socket_func), #endif /* end with a null feature */ EXP_FEAT(NULL, NULL) }; static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_set_exp_feature *cp = data; size_t i = 0; bt_dev_dbg(hdev, "sock %p", sk); for (i = 0; exp_features[i].uuid; i++) { if (!memcmp(cp->uuid, exp_features[i].uuid, 16)) return exp_features[i].set_func(sk, hdev, cp, data_len); } return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_NOT_SUPPORTED); } static u32 get_params_flags(struct hci_dev *hdev, struct hci_conn_params *params) { u32 flags = hdev->conn_flags; /* Devices using RPAs can only be programmed in the acceptlist if * LL Privacy has been enable otherwise they cannot mark * HCI_CONN_FLAG_REMOTE_WAKEUP. */ if ((flags & HCI_CONN_FLAG_REMOTE_WAKEUP) && !use_ll_privacy(hdev) && hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) flags &= ~HCI_CONN_FLAG_REMOTE_WAKEUP; return flags; } static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_get_device_flags *cp = data; struct mgmt_rp_get_device_flags rp; struct bdaddr_list_with_flags *br_params; struct hci_conn_params *params; u32 supported_flags; u32 current_flags = 0; u8 status = MGMT_STATUS_INVALID_PARAMS; bt_dev_dbg(hdev, "Get device flags %pMR (type 0x%x)\n", &cp->addr.bdaddr, cp->addr.type); hci_dev_lock(hdev); supported_flags = hdev->conn_flags; memset(&rp, 0, sizeof(rp)); if (cp->addr.type == BDADDR_BREDR) { br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &cp->addr.bdaddr, cp->addr.type); if (!br_params) goto done; current_flags = br_params->flags; } else { params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); if (!params) goto done; supported_flags = get_params_flags(hdev, params); current_flags = params->flags; } bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; rp.supported_flags = cpu_to_le32(supported_flags); rp.current_flags = cpu_to_le32(current_flags); status = MGMT_STATUS_SUCCESS; done: hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_DEVICE_FLAGS, status, &rp, sizeof(rp)); } static void device_flags_changed(struct sock *sk, struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u32 supported_flags, u32 current_flags) { struct mgmt_ev_device_flags_changed ev; bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = bdaddr_type; ev.supported_flags = cpu_to_le32(supported_flags); ev.current_flags = cpu_to_le32(current_flags); mgmt_event(MGMT_EV_DEVICE_FLAGS_CHANGED, hdev, &ev, sizeof(ev), sk); } static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_device_flags *cp = data; struct bdaddr_list_with_flags *br_params; struct hci_conn_params *params; u8 status = MGMT_STATUS_INVALID_PARAMS; u32 supported_flags; u32 current_flags = __le32_to_cpu(cp->current_flags); bt_dev_dbg(hdev, "Set device flags %pMR (type 0x%x) = 0x%x", &cp->addr.bdaddr, cp->addr.type, current_flags); // We should take hci_dev_lock() early, I think.. conn_flags can change supported_flags = hdev->conn_flags; if ((supported_flags | current_flags) != supported_flags) { bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)", current_flags, supported_flags); goto done; } hci_dev_lock(hdev); if (cp->addr.type == BDADDR_BREDR) { br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &cp->addr.bdaddr, cp->addr.type); if (br_params) { br_params->flags = current_flags; status = MGMT_STATUS_SUCCESS; } else { bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)", &cp->addr.bdaddr, cp->addr.type); } goto unlock; } params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); if (!params) { bt_dev_warn(hdev, "No such LE device %pMR (0x%x)", &cp->addr.bdaddr, le_addr_type(cp->addr.type)); goto unlock; } supported_flags = get_params_flags(hdev, params); if ((supported_flags | current_flags) != supported_flags) { bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)", current_flags, supported_flags); goto unlock; } WRITE_ONCE(params->flags, current_flags); status = MGMT_STATUS_SUCCESS; /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY * has been set. */ if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY) hci_update_passive_scan(hdev); unlock: hci_dev_unlock(hdev); done: if (status == MGMT_STATUS_SUCCESS) device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type, supported_flags, current_flags); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_FLAGS, status, &cp->addr, sizeof(cp->addr)); } static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, u16 handle) { struct mgmt_ev_adv_monitor_added ev; ev.monitor_handle = cpu_to_le16(handle); mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle) { struct mgmt_ev_adv_monitor_removed ev; struct mgmt_pending_cmd *cmd; struct sock *sk_skip = NULL; struct mgmt_cp_remove_adv_monitor *cp; cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); if (cmd) { cp = cmd->param; if (cp->monitor_handle) sk_skip = cmd->sk; } ev.monitor_handle = cpu_to_le16(handle); mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip); } static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct adv_monitor *monitor = NULL; struct mgmt_rp_read_adv_monitor_features *rp = NULL; int handle, err; size_t rp_size = 0; __u32 supported = 0; __u32 enabled = 0; __u16 num_handles = 0; __u16 handles[HCI_MAX_ADV_MONITOR_NUM_HANDLES]; BT_DBG("request for %s", hdev->name); hci_dev_lock(hdev); if (msft_monitor_supported(hdev)) supported |= MGMT_ADV_MONITOR_FEATURE_MASK_OR_PATTERNS; idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) handles[num_handles++] = monitor->handle; hci_dev_unlock(hdev); rp_size = sizeof(*rp) + (num_handles * sizeof(u16)); rp = kmalloc(rp_size, GFP_KERNEL); if (!rp) return -ENOMEM; /* All supported features are currently enabled */ enabled = supported; rp->supported_features = cpu_to_le32(supported); rp->enabled_features = cpu_to_le32(enabled); rp->max_num_handles = cpu_to_le16(HCI_MAX_ADV_MONITOR_NUM_HANDLES); rp->max_num_patterns = HCI_MAX_ADV_MONITOR_NUM_PATTERNS; rp->num_handles = cpu_to_le16(num_handles); if (num_handles) memcpy(&rp->handles, &handles, (num_handles * sizeof(u16))); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_ADV_MONITOR_FEATURES, MGMT_STATUS_SUCCESS, rp, rp_size); kfree(rp); return err; } static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, void *data, int status) { struct mgmt_rp_add_adv_patterns_monitor rp; struct mgmt_pending_cmd *cmd = data; struct adv_monitor *monitor = cmd->user_data; hci_dev_lock(hdev); rp.monitor_handle = cpu_to_le16(monitor->handle); if (!status) { mgmt_adv_monitor_added(cmd->sk, hdev, monitor->handle); hdev->adv_monitors_cnt++; if (monitor->state == ADV_MONITOR_STATE_NOT_REGISTERED) monitor->state = ADV_MONITOR_STATE_REGISTERED; hci_update_passive_scan(hdev); } mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "add monitor %d complete, status %d", rp.monitor_handle, status); } static int mgmt_add_adv_patterns_monitor_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct adv_monitor *monitor = cmd->user_data; return hci_add_adv_monitor(hdev, monitor); } static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, struct adv_monitor *m, u8 status, void *data, u16 len, u16 op) { struct mgmt_pending_cmd *cmd; int err; hci_dev_lock(hdev); if (status) goto unlock; if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) || pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } cmd = mgmt_pending_add(sk, op, hdev, data, len); if (!cmd) { status = MGMT_STATUS_NO_RESOURCES; goto unlock; } cmd->user_data = m; err = hci_cmd_sync_queue(hdev, mgmt_add_adv_patterns_monitor_sync, cmd, mgmt_add_adv_patterns_monitor_complete); if (err) { if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; else status = MGMT_STATUS_FAILED; goto unlock; } hci_dev_unlock(hdev); return 0; unlock: hci_free_adv_monitor(hdev, m); hci_dev_unlock(hdev); return mgmt_cmd_status(sk, hdev->id, op, status); } static void parse_adv_monitor_rssi(struct adv_monitor *m, struct mgmt_adv_rssi_thresholds *rssi) { if (rssi) { m->rssi.low_threshold = rssi->low_threshold; m->rssi.low_threshold_timeout = __le16_to_cpu(rssi->low_threshold_timeout); m->rssi.high_threshold = rssi->high_threshold; m->rssi.high_threshold_timeout = __le16_to_cpu(rssi->high_threshold_timeout); m->rssi.sampling_period = rssi->sampling_period; } else { /* Default values. These numbers are the least constricting * parameters for MSFT API to work, so it behaves as if there * are no rssi parameter to consider. May need to be changed * if other API are to be supported. */ m->rssi.low_threshold = -127; m->rssi.low_threshold_timeout = 60; m->rssi.high_threshold = -127; m->rssi.high_threshold_timeout = 0; m->rssi.sampling_period = 0; } } static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, struct mgmt_adv_pattern *patterns) { u8 offset = 0, length = 0; struct adv_pattern *p = NULL; int i; for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; if (offset >= HCI_MAX_EXT_AD_LENGTH || length > HCI_MAX_EXT_AD_LENGTH || (offset + length) > HCI_MAX_EXT_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return MGMT_STATUS_NO_RESOURCES; p->ad_type = patterns[i].ad_type; p->offset = patterns[i].offset; p->length = patterns[i].length; memcpy(p->value, patterns[i].value, p->length); INIT_LIST_HEAD(&p->list); list_add(&p->list, &m->patterns); } return MGMT_STATUS_SUCCESS; } static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_adv_patterns_monitor *cp = data; struct adv_monitor *m = NULL; u8 status = MGMT_STATUS_SUCCESS; size_t expected_size = sizeof(*cp); BT_DBG("request for %s", hdev->name); if (len <= sizeof(*cp)) { status = MGMT_STATUS_INVALID_PARAMS; goto done; } expected_size += cp->pattern_count * sizeof(struct mgmt_adv_pattern); if (len != expected_size) { status = MGMT_STATUS_INVALID_PARAMS; goto done; } m = kzalloc(sizeof(*m), GFP_KERNEL); if (!m) { status = MGMT_STATUS_NO_RESOURCES; goto done; } INIT_LIST_HEAD(&m->patterns); parse_adv_monitor_rssi(m, NULL); status = parse_adv_monitor_pattern(m, cp->pattern_count, cp->patterns); done: return __add_adv_patterns_monitor(sk, hdev, m, status, data, len, MGMT_OP_ADD_ADV_PATTERNS_MONITOR); } static int add_adv_patterns_monitor_rssi(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_adv_patterns_monitor_rssi *cp = data; struct adv_monitor *m = NULL; u8 status = MGMT_STATUS_SUCCESS; size_t expected_size = sizeof(*cp); BT_DBG("request for %s", hdev->name); if (len <= sizeof(*cp)) { status = MGMT_STATUS_INVALID_PARAMS; goto done; } expected_size += cp->pattern_count * sizeof(struct mgmt_adv_pattern); if (len != expected_size) { status = MGMT_STATUS_INVALID_PARAMS; goto done; } m = kzalloc(sizeof(*m), GFP_KERNEL); if (!m) { status = MGMT_STATUS_NO_RESOURCES; goto done; } INIT_LIST_HEAD(&m->patterns); parse_adv_monitor_rssi(m, &cp->rssi); status = parse_adv_monitor_pattern(m, cp->pattern_count, cp->patterns); done: return __add_adv_patterns_monitor(sk, hdev, m, status, data, len, MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI); } static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, void *data, int status) { struct mgmt_rp_remove_adv_monitor rp; struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_adv_monitor *cp = cmd->param; hci_dev_lock(hdev); rp.monitor_handle = cp->monitor_handle; if (!status) hci_update_passive_scan(hdev); mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "remove monitor %d complete, status %d", rp.monitor_handle, status); } static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_adv_monitor *cp = cmd->param; u16 handle = __le16_to_cpu(cp->monitor_handle); if (!handle) return hci_remove_all_adv_monitor(hdev); return hci_remove_single_adv_monitor(hdev, handle); } static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_pending_cmd *cmd; int err, status; hci_dev_lock(hdev); if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); if (!cmd) { status = MGMT_STATUS_NO_RESOURCES; goto unlock; } err = hci_cmd_sync_submit(hdev, mgmt_remove_adv_monitor_sync, cmd, mgmt_remove_adv_monitor_complete); if (err) { mgmt_pending_remove(cmd); if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; else status = MGMT_STATUS_FAILED; goto unlock; } hci_dev_unlock(hdev); return 0; unlock: hci_dev_unlock(hdev); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR, status); } static void read_local_oob_data_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_rp_read_local_oob_data mgmt_rp; size_t rp_size = sizeof(mgmt_rp); struct mgmt_pending_cmd *cmd = data; struct sk_buff *skb = cmd->skb; u8 status = mgmt_status(err); if (!status) { if (!skb) status = MGMT_STATUS_FAILED; else if (IS_ERR(skb)) status = mgmt_status(PTR_ERR(skb)); else status = mgmt_status(skb->data[0]); } bt_dev_dbg(hdev, "status %d", status); if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, status); goto remove; } memset(&mgmt_rp, 0, sizeof(mgmt_rp)); if (!bredr_sc_enabled(hdev)) { struct hci_rp_read_local_oob_data *rp = (void *) skb->data; if (skb->len < sizeof(*rp)) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_FAILED); goto remove; } memcpy(mgmt_rp.hash192, rp->hash, sizeof(rp->hash)); memcpy(mgmt_rp.rand192, rp->rand, sizeof(rp->rand)); rp_size -= sizeof(mgmt_rp.hash256) + sizeof(mgmt_rp.rand256); } else { struct hci_rp_read_local_oob_ext_data *rp = (void *) skb->data; if (skb->len < sizeof(*rp)) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_FAILED); goto remove; } memcpy(mgmt_rp.hash192, rp->hash192, sizeof(rp->hash192)); memcpy(mgmt_rp.rand192, rp->rand192, sizeof(rp->rand192)); memcpy(mgmt_rp.hash256, rp->hash256, sizeof(rp->hash256)); memcpy(mgmt_rp.rand256, rp->rand256, sizeof(rp->rand256)); } mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_SUCCESS, &mgmt_rp, rp_size); remove: if (skb && !IS_ERR(skb)) kfree_skb(skb); mgmt_pending_free(cmd); } static int read_local_oob_data_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; if (bredr_sc_enabled(hdev)) cmd->skb = hci_read_local_oob_data_sync(hdev, true, cmd->sk); else cmd->skb = hci_read_local_oob_data_sync(hdev, false, cmd->sk); if (IS_ERR(cmd->skb)) return PTR_ERR(cmd->skb); else return 0; } static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_NOT_POWERED); goto unlock; } if (!lmp_ssp_capable(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_NOT_SUPPORTED); goto unlock; } cmd = mgmt_pending_new(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, read_local_oob_data_sync, cmd, read_local_oob_data_complete); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_free(cmd); } unlock: hci_dev_unlock(hdev); return err; } static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_addr_info *addr = data; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(addr->type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, MGMT_STATUS_INVALID_PARAMS, addr, sizeof(*addr)); hci_dev_lock(hdev); if (len == MGMT_ADD_REMOTE_OOB_DATA_SIZE) { struct mgmt_cp_add_remote_oob_data *cp = data; u8 status; if (cp->addr.type != BDADDR_BREDR) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->addr.type, cp->hash, cp->rand, NULL, NULL); if (err < 0) status = MGMT_STATUS_FAILED; else status = MGMT_STATUS_SUCCESS; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); } else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) { struct mgmt_cp_add_remote_oob_ext_data *cp = data; u8 *rand192, *hash192, *rand256, *hash256; u8 status; if (bdaddr_type_is_le(cp->addr.type)) { /* Enforce zero-valued 192-bit parameters as * long as legacy SMP OOB isn't implemented. */ if (memcmp(cp->rand192, ZERO_KEY, 16) || memcmp(cp->hash192, ZERO_KEY, 16)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, MGMT_STATUS_INVALID_PARAMS, addr, sizeof(*addr)); goto unlock; } rand192 = NULL; hash192 = NULL; } else { /* In case one of the P-192 values is set to zero, * then just disable OOB data for P-192. */ if (!memcmp(cp->rand192, ZERO_KEY, 16) || !memcmp(cp->hash192, ZERO_KEY, 16)) { rand192 = NULL; hash192 = NULL; } else { rand192 = cp->rand192; hash192 = cp->hash192; } } /* In case one of the P-256 values is set to zero, then just * disable OOB data for P-256. */ if (!memcmp(cp->rand256, ZERO_KEY, 16) || !memcmp(cp->hash256, ZERO_KEY, 16)) { rand256 = NULL; hash256 = NULL; } else { rand256 = cp->rand256; hash256 = cp->hash256; } err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->addr.type, hash192, rand192, hash256, rand256); if (err < 0) status = MGMT_STATUS_FAILED; else status = MGMT_STATUS_SUCCESS; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); } else { bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes", len); err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, MGMT_STATUS_INVALID_PARAMS); } unlock: hci_dev_unlock(hdev); return err; } static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_remote_oob_data *cp = data; u8 status; int err; bt_dev_dbg(hdev, "sock %p", sk); if (cp->addr.type != BDADDR_BREDR) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); if (!bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { hci_remote_oob_data_clear(hdev); status = MGMT_STATUS_SUCCESS; goto done; } err = hci_remove_remote_oob_data(hdev, &cp->addr.bdaddr, cp->addr.type); if (err < 0) status = MGMT_STATUS_INVALID_PARAMS; else status = MGMT_STATUS_SUCCESS; done: err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); return err; } void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; bt_dev_dbg(hdev, "status %u", status); hci_dev_lock(hdev); cmd = pending_find(MGMT_OP_START_DISCOVERY, hdev); if (!cmd) cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); if (!cmd) cmd = pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev); if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); } hci_dev_unlock(hdev); } static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, uint8_t *mgmt_status) { switch (type) { case DISCOV_TYPE_LE: *mgmt_status = mgmt_le_support(hdev); if (*mgmt_status) return false; break; case DISCOV_TYPE_INTERLEAVED: *mgmt_status = mgmt_le_support(hdev); if (*mgmt_status) return false; fallthrough; case DISCOV_TYPE_BREDR: *mgmt_status = mgmt_bredr_support(hdev); if (*mgmt_status) return false; break; default: *mgmt_status = MGMT_STATUS_INVALID_PARAMS; return false; } return true; } static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) && cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) && cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) return; bt_dev_dbg(hdev, "err %d", err); mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED: DISCOVERY_FINDING); } static int start_discovery_sync(struct hci_dev *hdev, void *data) { return hci_start_discovery_sync(hdev); } static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, u16 op, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; struct mgmt_pending_cmd *cmd; u8 status; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_NOT_POWERED, &cp->type, sizeof(cp->type)); goto failed; } if (hdev->discovery.state != DISCOVERY_STOPPED || hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, &cp->type, sizeof(cp->type)); goto failed; } if (!discovery_type_is_valid(hdev, cp->type, &status)) { err = mgmt_cmd_complete(sk, hdev->id, op, status, &cp->type, sizeof(cp->type)); goto failed; } /* Can't start discovery when it is paused */ if (hdev->discovery_paused) { err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, &cp->type, sizeof(cp->type)); goto failed; } /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ hci_discovery_filter_clear(hdev); hdev->discovery.type = cp->type; hdev->discovery.report_invalid_rssi = false; if (op == MGMT_OP_START_LIMITED_DISCOVERY) hdev->discovery.limited = true; else hdev->discovery.limited = false; cmd = mgmt_pending_add(sk, op, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd, start_discovery_complete); if (err < 0) { mgmt_pending_remove(cmd); goto failed; } hci_discovery_set_state(hdev, DISCOVERY_STARTING); failed: hci_dev_unlock(hdev); return err; } static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { return start_discovery_internal(sk, hdev, MGMT_OP_START_DISCOVERY, data, len); } static int start_limited_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { return start_discovery_internal(sk, hdev, MGMT_OP_START_LIMITED_DISCOVERY, data, len); } static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_service_discovery *cp = data; struct mgmt_pending_cmd *cmd; const u16 max_uuid_count = ((U16_MAX - sizeof(*cp)) / 16); u16 uuid_count, expected_len; u8 status; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_NOT_POWERED, &cp->type, sizeof(cp->type)); goto failed; } if (hdev->discovery.state != DISCOVERY_STOPPED || hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_BUSY, &cp->type, sizeof(cp->type)); goto failed; } if (hdev->discovery_paused) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_BUSY, &cp->type, sizeof(cp->type)); goto failed; } uuid_count = __le16_to_cpu(cp->uuid_count); if (uuid_count > max_uuid_count) { bt_dev_err(hdev, "service_discovery: too big uuid_count value %u", uuid_count); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_INVALID_PARAMS, &cp->type, sizeof(cp->type)); goto failed; } expected_len = sizeof(*cp) + uuid_count * 16; if (expected_len != len) { bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes", expected_len, len); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_INVALID_PARAMS, &cp->type, sizeof(cp->type)); goto failed; } if (!discovery_type_is_valid(hdev, cp->type, &status)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, status, &cp->type, sizeof(cp->type)); goto failed; } cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ hci_discovery_filter_clear(hdev); hdev->discovery.result_filtering = true; hdev->discovery.type = cp->type; hdev->discovery.rssi = cp->rssi; hdev->discovery.uuid_count = uuid_count; if (uuid_count > 0) { hdev->discovery.uuids = kmemdup(cp->uuids, uuid_count * 16, GFP_KERNEL); if (!hdev->discovery.uuids) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_FAILED, &cp->type, sizeof(cp->type)); mgmt_pending_remove(cmd); goto failed; } } err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd, start_discovery_complete); if (err < 0) { mgmt_pending_remove(cmd); goto failed; } hci_discovery_set_state(hdev, DISCOVERY_STARTING); failed: hci_dev_unlock(hdev); return err; } void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; bt_dev_dbg(hdev, "status %u", status); hci_dev_lock(hdev); cmd = pending_find(MGMT_OP_STOP_DISCOVERY, hdev); if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); } hci_dev_unlock(hdev); } static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev)) return; bt_dev_dbg(hdev, "err %d", err); mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); if (!err) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } static int stop_discovery_sync(struct hci_dev *hdev, void *data) { return hci_stop_discovery_sync(hdev); } static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_stop_discovery *mgmt_cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hci_discovery_active(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, MGMT_STATUS_REJECTED, &mgmt_cp->type, sizeof(mgmt_cp->type)); goto unlock; } if (hdev->discovery.type != mgmt_cp->type) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, MGMT_STATUS_INVALID_PARAMS, &mgmt_cp->type, sizeof(mgmt_cp->type)); goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto unlock; } err = hci_cmd_sync_queue(hdev, stop_discovery_sync, cmd, stop_discovery_complete); if (err < 0) { mgmt_pending_remove(cmd); goto unlock; } hci_discovery_set_state(hdev, DISCOVERY_STOPPING); unlock: hci_dev_unlock(hdev); return err; } static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_confirm_name *cp = data; struct inquiry_entry *e; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (!hci_discovery_active(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, MGMT_STATUS_FAILED, &cp->addr, sizeof(cp->addr)); goto failed; } e = hci_inquiry_cache_lookup_unknown(hdev, &cp->addr.bdaddr); if (!e) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto failed; } if (cp->name_known) { e->name_state = NAME_KNOWN; list_del(&e->list); } else { e->name_state = NAME_NEEDED; hci_inquiry_cache_update_resolve(hdev, e); } err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, &cp->addr, sizeof(cp->addr)); failed: hci_dev_unlock(hdev); return err; } static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_block_device *cp = data; u8 status; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); err = hci_bdaddr_list_add(&hdev->reject_list, &cp->addr.bdaddr, cp->addr.type); if (err < 0) { status = MGMT_STATUS_FAILED; goto done; } mgmt_event(MGMT_EV_DEVICE_BLOCKED, hdev, &cp->addr, sizeof(cp->addr), sk); status = MGMT_STATUS_SUCCESS; done: err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); return err; } static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_unblock_device *cp = data; u8 status; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); err = hci_bdaddr_list_del(&hdev->reject_list, &cp->addr.bdaddr, cp->addr.type); if (err < 0) { status = MGMT_STATUS_INVALID_PARAMS; goto done; } mgmt_event(MGMT_EV_DEVICE_UNBLOCKED, hdev, &cp->addr, sizeof(cp->addr), sk); status = MGMT_STATUS_SUCCESS; done: err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); hci_dev_unlock(hdev); return err; } static int set_device_id_sync(struct hci_dev *hdev, void *data) { return hci_update_eir_sync(hdev); } static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_device_id *cp = data; int err; __u16 source; bt_dev_dbg(hdev, "sock %p", sk); source = __le16_to_cpu(cp->source); if (source > 0x0002) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); hdev->devid_source = source; hdev->devid_vendor = __le16_to_cpu(cp->vendor); hdev->devid_product = __le16_to_cpu(cp->product); hdev->devid_version = __le16_to_cpu(cp->version); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); hci_cmd_sync_queue(hdev, set_device_id_sync, NULL, NULL); hci_dev_unlock(hdev); return err; } static void enable_advertising_instance(struct hci_dev *hdev, int err) { if (err) bt_dev_err(hdev, "failed to re-configure advertising %d", err); else bt_dev_dbg(hdev, "status %d", err); } static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) { struct cmd_lookup match = { NULL, hdev }; u8 instance; struct adv_info *adv_instance; u8 status = mgmt_status(err); if (status) { mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, cmd_status_rsp, &status); return; } if (hci_dev_test_flag(hdev, HCI_LE_ADV)) hci_dev_set_flag(hdev, HCI_ADVERTISING); else hci_dev_clear_flag(hdev, HCI_ADVERTISING); mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, &match); new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); /* If "Set Advertising" was just disabled and instance advertising was * set up earlier, then re-enable multi-instance advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || list_empty(&hdev->adv_instances)) return; instance = hdev->cur_adv_instance; if (!instance) { adv_instance = list_first_entry_or_null(&hdev->adv_instances, struct adv_info, list); if (!adv_instance) return; instance = adv_instance->instance; } err = hci_schedule_adv_instance_sync(hdev, instance, true); enable_advertising_instance(hdev, err); } static int set_adv_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp = cmd->param; u8 val = !!cp->val; if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); else hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); cancel_adv_timeout(hdev); if (val) { /* Switch to instance "0" for the Set Advertising setting. * We cannot use update_[adv|scan_rsp]_data() here as the * HCI_ADVERTISING flag is not yet set. */ hdev->cur_adv_instance = 0x00; if (ext_adv_capable(hdev)) { hci_start_ext_adv_sync(hdev, 0x00); } else { hci_update_adv_data_sync(hdev, 0x00); hci_update_scan_rsp_data_sync(hdev, 0x00); hci_enable_advertising_sync(hdev); } } else { hci_disable_advertising_sync(hdev); } return 0; } static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; u8 val, status; int err; bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_le_support(hdev); if (status) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, status); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); if (hdev->advertising_paused) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_BUSY); hci_dev_lock(hdev); val = !!cp->val; /* The following conditions are ones which mean that we should * not do any HCI communication but directly send a mgmt * response to user space (after toggling the flag if * necessary). */ if (!hdev_is_powered(hdev) || (val == hci_dev_test_flag(hdev, HCI_ADVERTISING) && (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) || hci_dev_test_flag(hdev, HCI_MESH) || hci_conn_num(hdev, LE_LINK) > 0 || (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE)) { bool changed; if (cp->val) { hdev->cur_adv_instance = 0x00; changed = !hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING); if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); else hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_ADVERTISING); hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE); } err = send_settings_rsp(sk, MGMT_OP_SET_ADVERTISING, hdev); if (err < 0) goto unlock; if (changed) err = new_settings(hdev, sk); goto unlock; } if (pending_find(MGMT_OP_SET_ADVERTISING, hdev) || pending_find(MGMT_OP_SET_LE, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_BUSY); goto unlock; } cmd = mgmt_pending_add(sk, MGMT_OP_SET_ADVERTISING, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_adv_sync, cmd, set_advertising_complete); if (err < 0 && cmd) mgmt_pending_remove(cmd); unlock: hci_dev_unlock(hdev); return err; } static int set_static_address(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_static_address *cp = data; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, MGMT_STATUS_NOT_SUPPORTED); if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, MGMT_STATUS_REJECTED); if (bacmp(&cp->bdaddr, BDADDR_ANY)) { if (!bacmp(&cp->bdaddr, BDADDR_NONE)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, MGMT_STATUS_INVALID_PARAMS); /* Two most significant bits shall be set */ if ((cp->bdaddr.b[5] & 0xc0) != 0xc0) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); bacpy(&hdev->static_addr, &cp->bdaddr); err = send_settings_rsp(sk, MGMT_OP_SET_STATIC_ADDRESS, hdev); if (err < 0) goto unlock; err = new_settings(hdev, sk); unlock: hci_dev_unlock(hdev); return err; } static int set_scan_params(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_scan_params *cp = data; __u16 interval, window; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_NOT_SUPPORTED); interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_INVALID_PARAMS); window = __le16_to_cpu(cp->window); if (window < 0x0004 || window > 0x4000) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_INVALID_PARAMS); if (window > interval) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); hdev->le_scan_interval = interval; hdev->le_scan_window = window; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, 0, NULL, 0); /* If background scan is running, restart it so new parameters are * loaded. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->discovery.state == DISCOVERY_STOPPED) hci_update_passive_scan(hdev); hci_dev_unlock(hdev); return err; } static void fast_connectable_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; bt_dev_dbg(hdev, "err %d", err); if (err) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, mgmt_status(err)); } else { struct mgmt_mode *cp = cmd->param; if (cp->val) hci_dev_set_flag(hdev, HCI_FAST_CONNECTABLE); else hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE); send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); new_settings(hdev, cmd->sk); } mgmt_pending_free(cmd); } static int write_fast_connectable_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp = cmd->param; return hci_write_fast_connectable_sync(hdev, cp->val); } static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || hdev->hci_ver < BLUETOOTH_VER_1_2) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!!cp->val == hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) { err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); goto unlock; } if (!hdev_is_powered(hdev)) { hci_dev_change_flag(hdev, HCI_FAST_CONNECTABLE); err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); new_settings(hdev, sk); goto unlock; } cmd = mgmt_pending_new(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, write_fast_connectable_sync, cmd, fast_connectable_complete); if (err < 0) { mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_free(cmd); } unlock: hci_dev_unlock(hdev); return err; } static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; bt_dev_dbg(hdev, "err %d", err); if (err) { u8 mgmt_err = mgmt_status(err); /* We need to restore the flag if related HCI commands * failed. */ hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); } mgmt_pending_free(cmd); } static int set_bredr_sync(struct hci_dev *hdev, void *data) { int status; status = hci_write_fast_connectable_sync(hdev, false); if (!status) status = hci_update_scan_sync(hdev); /* Since only the advertising data flags will change, there * is no need to update the scan response data. */ if (!status) status = hci_update_adv_data_sync(hdev, hdev->cur_adv_instance); return status; } static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_bredr_capable(hdev) || !lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_NOT_SUPPORTED); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val == hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); goto unlock; } if (!hdev_is_powered(hdev)) { if (!cp->val) { hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); hci_dev_clear_flag(hdev, HCI_LINK_SECURITY); hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE); } hci_dev_change_flag(hdev, HCI_BREDR_ENABLED); err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); if (err < 0) goto unlock; err = new_settings(hdev, sk); goto unlock; } /* Reject disabling when powered on */ if (!cp->val) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_REJECTED); goto unlock; } else { /* When configuring a dual-mode controller to operate * with LE only and using a static address, then switching * BR/EDR back on is not allowed. * * Dual-mode controllers shall operate with the public * address as its identity address for BR/EDR and LE. So * reject the attempt to create an invalid configuration. * * The same restrictions applies when secure connections * has been enabled. For BR/EDR this is a controller feature * while for LE it is a host stack feature. This means that * switching BR/EDR back on when secure connections has been * enabled is not a supported transaction. */ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && (bacmp(&hdev->static_addr, BDADDR_ANY) || hci_dev_test_flag(hdev, HCI_SC_ENABLED))) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_REJECTED); goto unlock; } } cmd = mgmt_pending_new(sk, MGMT_OP_SET_BREDR, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_bredr_sync, cmd, set_bredr_complete); if (err < 0) { mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_free(cmd); goto unlock; } /* We need to flip the bit already here so that * hci_req_update_adv_data generates the correct flags. */ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); unlock: hci_dev_unlock(hdev); return err; } static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp; bt_dev_dbg(hdev, "err %d", err); if (err) { u8 mgmt_err = mgmt_status(err); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); goto done; } cp = cmd->param; switch (cp->val) { case 0x00: hci_dev_clear_flag(hdev, HCI_SC_ENABLED); hci_dev_clear_flag(hdev, HCI_SC_ONLY); break; case 0x01: hci_dev_set_flag(hdev, HCI_SC_ENABLED); hci_dev_clear_flag(hdev, HCI_SC_ONLY); break; case 0x02: hci_dev_set_flag(hdev, HCI_SC_ENABLED); hci_dev_set_flag(hdev, HCI_SC_ONLY); break; } send_settings_rsp(cmd->sk, cmd->opcode, hdev); new_settings(hdev, cmd->sk); done: mgmt_pending_free(cmd); } static int set_secure_conn_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_mode *cp = cmd->param; u8 val = !!cp->val; /* Force write of val */ hci_dev_set_flag(hdev, HCI_SC_ENABLED); return hci_write_sc_support_sync(hdev, val); } static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; u8 val; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_sc_capable(hdev) && !hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_NOT_SUPPORTED); if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && lmp_sc_capable(hdev) && !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_REJECTED); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) || !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { bool changed; if (cp->val) { changed = !hci_dev_test_and_set_flag(hdev, HCI_SC_ENABLED); if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_SC_ONLY); else hci_dev_clear_flag(hdev, HCI_SC_ONLY); } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_SC_ENABLED); hci_dev_clear_flag(hdev, HCI_SC_ONLY); } err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev); if (err < 0) goto failed; if (changed) err = new_settings(hdev, sk); goto failed; } val = !!cp->val; if (val == hci_dev_test_flag(hdev, HCI_SC_ENABLED) && (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_SC_ONLY)) { err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev); goto failed; } cmd = mgmt_pending_new(sk, MGMT_OP_SET_SECURE_CONN, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, set_secure_conn_sync, cmd, set_secure_conn_complete); if (err < 0) { mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_FAILED); if (cmd) mgmt_pending_free(cmd); } failed: hci_dev_unlock(hdev); return err; } static int set_debug_keys(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; bool changed, use_changed; int err; bt_dev_dbg(hdev, "sock %p", sk); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS); else changed = hci_dev_test_and_clear_flag(hdev, HCI_KEEP_DEBUG_KEYS); if (cp->val == 0x02) use_changed = !hci_dev_test_and_set_flag(hdev, HCI_USE_DEBUG_KEYS); else use_changed = hci_dev_test_and_clear_flag(hdev, HCI_USE_DEBUG_KEYS); if (hdev_is_powered(hdev) && use_changed && hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { u8 mode = (cp->val == 0x02) ? 0x01 : 0x00; hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(mode), &mode); } err = send_settings_rsp(sk, MGMT_OP_SET_DEBUG_KEYS, hdev); if (err < 0) goto unlock; if (changed) err = new_settings(hdev, sk); unlock: hci_dev_unlock(hdev); return err; } static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_set_privacy *cp = cp_data; bool changed; int err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, MGMT_STATUS_NOT_SUPPORTED); if (cp->privacy != 0x00 && cp->privacy != 0x01 && cp->privacy != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, MGMT_STATUS_INVALID_PARAMS); if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, MGMT_STATUS_REJECTED); hci_dev_lock(hdev); /* If user space supports this command it is also expected to * handle IRKs. Therefore, set the HCI_RPA_RESOLVING flag. */ hci_dev_set_flag(hdev, HCI_RPA_RESOLVING); if (cp->privacy) { changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY); memcpy(hdev->irk, cp->irk, sizeof(hdev->irk)); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); hci_adv_instances_set_rpa_expired(hdev, true); if (cp->privacy == 0x02) hci_dev_set_flag(hdev, HCI_LIMITED_PRIVACY); else hci_dev_clear_flag(hdev, HCI_LIMITED_PRIVACY); } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY); memset(hdev->irk, 0, sizeof(hdev->irk)); hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); hci_adv_instances_set_rpa_expired(hdev, false); hci_dev_clear_flag(hdev, HCI_LIMITED_PRIVACY); } err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev); if (err < 0) goto unlock; if (changed) err = new_settings(hdev, sk); unlock: hci_dev_unlock(hdev); return err; } static bool irk_is_valid(struct mgmt_irk_info *irk) { switch (irk->addr.type) { case BDADDR_LE_PUBLIC: return true; case BDADDR_LE_RANDOM: /* Two most significant bits shall be set */ if ((irk->addr.bdaddr.b[5] & 0xc0) != 0xc0) return false; return true; } return false; } static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_load_irks *cp = cp_data; const u16 max_irk_count = ((U16_MAX - sizeof(*cp)) / sizeof(struct mgmt_irk_info)); u16 irk_count, expected_len; int i, err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_NOT_SUPPORTED); irk_count = __le16_to_cpu(cp->irk_count); if (irk_count > max_irk_count) { bt_dev_err(hdev, "load_irks: too big irk_count value %u", irk_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_INVALID_PARAMS); } expected_len = struct_size(cp, irks, irk_count); if (expected_len != len) { bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes", expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_INVALID_PARAMS); } bt_dev_dbg(hdev, "irk_count %u", irk_count); for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *key = &cp->irks[i]; if (!irk_is_valid(key)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_INVALID_PARAMS); } hci_dev_lock(hdev); hci_smp_irks_clear(hdev); for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, irk->val)) { bt_dev_warn(hdev, "Skipping blocked IRK for %pMR", &irk->addr.bdaddr); continue; } hci_add_irk(hdev, &irk->addr.bdaddr, le_addr_type(irk->addr.type), irk->val, BDADDR_ANY); } hci_dev_set_flag(hdev, HCI_RPA_RESOLVING); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_IRKS, 0, NULL, 0); hci_dev_unlock(hdev); return err; } static bool ltk_is_valid(struct mgmt_ltk_info *key) { if (key->initiator != 0x00 && key->initiator != 0x01) return false; switch (key->addr.type) { case BDADDR_LE_PUBLIC: return true; case BDADDR_LE_RANDOM: /* Two most significant bits shall be set */ if ((key->addr.bdaddr.b[5] & 0xc0) != 0xc0) return false; return true; } return false; } static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_load_long_term_keys *cp = cp_data; const u16 max_key_count = ((U16_MAX - sizeof(*cp)) / sizeof(struct mgmt_ltk_info)); u16 key_count, expected_len; int i, err; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { bt_dev_err(hdev, "load_ltks: too big key_count value %u", key_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_STATUS_INVALID_PARAMS); } expected_len = struct_size(cp, keys, key_count); if (expected_len != len) { bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes", expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_STATUS_INVALID_PARAMS); } bt_dev_dbg(hdev, "key_count %u", key_count); hci_dev_lock(hdev); hci_smp_ltks_clear(hdev); for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, key->val)) { bt_dev_warn(hdev, "Skipping blocked LTK for %pMR", &key->addr.bdaddr); continue; } if (!ltk_is_valid(key)) { bt_dev_warn(hdev, "Invalid LTK for %pMR", &key->addr.bdaddr); continue; } switch (key->type) { case MGMT_LTK_UNAUTHENTICATED: authenticated = 0x00; type = key->initiator ? SMP_LTK : SMP_LTK_RESPONDER; break; case MGMT_LTK_AUTHENTICATED: authenticated = 0x01; type = key->initiator ? SMP_LTK : SMP_LTK_RESPONDER; break; case MGMT_LTK_P256_UNAUTH: authenticated = 0x00; type = SMP_LTK_P256; break; case MGMT_LTK_P256_AUTH: authenticated = 0x01; type = SMP_LTK_P256; break; case MGMT_LTK_P256_DEBUG: authenticated = 0x00; type = SMP_LTK_P256_DEBUG; fallthrough; default: continue; } hci_add_ltk(hdev, &key->addr.bdaddr, le_addr_type(key->addr.type), type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, NULL, 0); hci_dev_unlock(hdev); return err; } static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct hci_conn *conn = cmd->user_data; struct mgmt_cp_get_conn_info *cp = cmd->param; struct mgmt_rp_get_conn_info rp; u8 status; bt_dev_dbg(hdev, "err %d", err); memcpy(&rp.addr, &cp->addr, sizeof(rp.addr)); status = mgmt_status(err); if (status == MGMT_STATUS_SUCCESS) { rp.rssi = conn->rssi; rp.tx_power = conn->tx_power; rp.max_tx_power = conn->max_tx_power; } else { rp.rssi = HCI_RSSI_INVALID; rp.tx_power = HCI_TX_POWER_INVALID; rp.max_tx_power = HCI_TX_POWER_INVALID; } mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); } static int get_conn_info_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_get_conn_info *cp = cmd->param; struct hci_conn *conn; int err; __le16 handle; /* Make sure we are still connected */ if (cp->addr.type == BDADDR_BREDR) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); else conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) return MGMT_STATUS_NOT_CONNECTED; cmd->user_data = conn; handle = cpu_to_le16(conn->handle); /* Refresh RSSI each time */ err = hci_read_rssi_sync(hdev, handle); /* For LE links TX power does not change thus we don't need to * query for it once value is known. */ if (!err && (!bdaddr_type_is_le(cp->addr.type) || conn->tx_power == HCI_TX_POWER_INVALID)) err = hci_read_tx_power_sync(hdev, handle, 0x00); /* Max TX power needs to be read only once per connection */ if (!err && conn->max_tx_power == HCI_TX_POWER_INVALID) err = hci_read_tx_power_sync(hdev, handle, 0x01); return err; } static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_get_conn_info *cp = data; struct mgmt_rp_get_conn_info rp; struct hci_conn *conn; unsigned long conn_info_age; int err = 0; bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto unlock; } if (cp->addr.type == BDADDR_BREDR) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); else conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); goto unlock; } /* To avoid client trying to guess when to poll again for information we * calculate conn info age as random value between min/max set in hdev. */ conn_info_age = get_random_u32_inclusive(hdev->conn_info_min_age, hdev->conn_info_max_age - 1); /* Query controller to refresh cached values if they are too old or were * never read. */ if (time_after(jiffies, conn->conn_info_timestamp + msecs_to_jiffies(conn_info_age)) || !conn->conn_info_timestamp) { struct mgmt_pending_cmd *cmd; cmd = mgmt_pending_new(sk, MGMT_OP_GET_CONN_INFO, hdev, data, len); if (!cmd) { err = -ENOMEM; } else { err = hci_cmd_sync_queue(hdev, get_conn_info_sync, cmd, get_conn_info_complete); } if (err < 0) { mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, MGMT_STATUS_FAILED, &rp, sizeof(rp)); if (cmd) mgmt_pending_free(cmd); goto unlock; } conn->conn_info_timestamp = jiffies; } else { /* Cache is valid, just reply with values cached in hci_conn */ rp.rssi = conn->rssi; rp.tx_power = conn->tx_power; rp.max_tx_power = conn->max_tx_power; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); } unlock: hci_dev_unlock(hdev); return err; } static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_get_clock_info *cp = cmd->param; struct mgmt_rp_get_clock_info rp; struct hci_conn *conn = cmd->user_data; u8 status = mgmt_status(err); bt_dev_dbg(hdev, "err %d", err); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; if (err) goto complete; rp.local_clock = cpu_to_le32(hdev->clock); if (conn) { rp.piconet_clock = cpu_to_le32(conn->clock); rp.accuracy = cpu_to_le16(conn->clock_accuracy); } complete: mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); } static int get_clock_info_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_get_clock_info *cp = cmd->param; struct hci_cp_read_clock hci_cp; struct hci_conn *conn; memset(&hci_cp, 0, sizeof(hci_cp)); hci_read_clock_sync(hdev, &hci_cp); /* Make sure connection still exists */ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) return MGMT_STATUS_NOT_CONNECTED; cmd->user_data = conn; hci_cp.handle = cpu_to_le16(conn->handle); hci_cp.which = 0x01; /* Piconet clock */ return hci_read_clock_sync(hdev, &hci_cp); } static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_get_clock_info *cp = data; struct mgmt_rp_get_clock_info rp; struct mgmt_pending_cmd *cmd; struct hci_conn *conn; int err; bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; if (cp->addr.type != BDADDR_BREDR) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto unlock; } if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); if (!conn || conn->state != BT_CONNECTED) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); goto unlock; } } else { conn = NULL; } cmd = mgmt_pending_new(sk, MGMT_OP_GET_CLOCK_INFO, hdev, data, len); if (!cmd) err = -ENOMEM; else err = hci_cmd_sync_queue(hdev, get_clock_info_sync, cmd, get_clock_info_complete); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, MGMT_STATUS_FAILED, &rp, sizeof(rp)); if (cmd) mgmt_pending_free(cmd); } unlock: hci_dev_unlock(hdev); return err; } static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) { struct hci_conn *conn; conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); if (!conn) return false; if (conn->dst_type != type) return false; if (conn->state != BT_CONNECTED) return false; return true; } /* This function requires the caller holds hdev->lock */ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, u8 auto_connect) { struct hci_conn_params *params; params = hci_conn_params_add(hdev, addr, addr_type); if (!params) return -EIO; if (params->auto_connect == auto_connect) return 0; hci_pend_le_list_del_init(params); switch (auto_connect) { case HCI_AUTO_CONN_DISABLED: case HCI_AUTO_CONN_LINK_LOSS: /* If auto connect is being disabled when we're trying to * connect to device, keep connecting. */ if (params->explicit_connect) hci_pend_le_list_add(params, &hdev->pend_le_conns); break; case HCI_AUTO_CONN_REPORT: if (params->explicit_connect) hci_pend_le_list_add(params, &hdev->pend_le_conns); else hci_pend_le_list_add(params, &hdev->pend_le_reports); break; case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: if (!is_connected(hdev, addr, addr_type)) hci_pend_le_list_add(params, &hdev->pend_le_conns); break; } params->auto_connect = auto_connect; bt_dev_dbg(hdev, "addr %pMR (type %u) auto_connect %u", addr, addr_type, auto_connect); return 0; } static void device_added(struct sock *sk, struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type, u8 action) { struct mgmt_ev_device_added ev; bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = type; ev.action = action; mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } static int add_device_sync(struct hci_dev *hdev, void *data) { return hci_update_passive_scan_sync(hdev); } static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; u8 auto_conn, addr_type; struct hci_conn_params *params; int err; u32 current_flags = 0; u32 supported_flags; bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(cp->addr.type) || !bacmp(&cp->addr.bdaddr, BDADDR_ANY)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); if (cp->action != 0x00 && cp->action != 0x01 && cp->action != 0x02) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); hci_dev_lock(hdev); if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } err = hci_bdaddr_list_add_with_flags(&hdev->accept_list, &cp->addr.bdaddr, cp->addr.type, 0); if (err) goto unlock; hci_update_scan(hdev); goto added; } addr_type = le_addr_type(cp->addr.type); if (cp->action == 0x02) auto_conn = HCI_AUTO_CONN_ALWAYS; else if (cp->action == 0x01) auto_conn = HCI_AUTO_CONN_DIRECT; else auto_conn = HCI_AUTO_CONN_REPORT; /* Kernel internally uses conn_params with resolvable private * address, but Add Device allows only identity addresses. * Make sure it is enforced before calling * hci_conn_params_lookup. */ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } /* If the connection parameters don't exist for this device, * they will be created and configured with defaults. */ if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type, auto_conn) < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_FAILED, &cp->addr, sizeof(cp->addr)); goto unlock; } else { params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (params) current_flags = params->flags; } err = hci_cmd_sync_queue(hdev, add_device_sync, NULL, NULL); if (err < 0) goto unlock; added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); supported_flags = hdev->conn_flags; device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type, supported_flags, current_flags); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); unlock: hci_dev_unlock(hdev); return err; } static void device_removed(struct sock *sk, struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) { struct mgmt_ev_device_removed ev; bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = type; mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); } static int remove_device_sync(struct hci_dev *hdev, void *data) { return hci_update_passive_scan_sync(hdev); } static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { struct hci_conn_params *params; u8 addr_type; if (!bdaddr_type_is_valid(cp->addr.type)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } if (cp->addr.type == BDADDR_BREDR) { err = hci_bdaddr_list_del(&hdev->accept_list, &cp->addr.bdaddr, cp->addr.type); if (err) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } hci_update_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); goto complete; } addr_type = le_addr_type(cp->addr.type); /* Kernel internally uses conn_params with resolvable private * address, but Remove Device allows only identity addresses. * Make sure it is enforced before calling * hci_conn_params_lookup. */ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (!params) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } if (params->auto_connect == HCI_AUTO_CONN_DISABLED || params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } hci_conn_params_free(params); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); } else { struct hci_conn_params *p, *tmp; struct bdaddr_list *b, *btmp; if (cp->addr.type) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); goto unlock; } list_for_each_entry_safe(b, btmp, &hdev->accept_list, list) { device_removed(sk, hdev, &b->bdaddr, b->bdaddr_type); list_del(&b->list); kfree(b); } hci_update_scan(hdev); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) continue; device_removed(sk, hdev, &p->addr, p->addr_type); if (p->explicit_connect) { p->auto_connect = HCI_AUTO_CONN_EXPLICIT; continue; } hci_conn_params_free(p); } bt_dev_dbg(hdev, "All LE connection parameters were removed"); } hci_cmd_sync_queue(hdev, remove_device_sync, NULL, NULL); complete: err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); unlock: hci_dev_unlock(hdev); return err; } static int conn_update_sync(struct hci_dev *hdev, void *data) { struct hci_conn_params *params = data; struct hci_conn *conn; conn = hci_conn_hash_lookup_le(hdev, ¶ms->addr, params->addr_type); if (!conn) return -ECANCELED; return hci_le_conn_update_sync(hdev, conn, params); } static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_load_conn_param *cp = data; const u16 max_param_count = ((U16_MAX - sizeof(*cp)) / sizeof(struct mgmt_conn_param)); u16 param_count, expected_len; int i; if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, MGMT_STATUS_NOT_SUPPORTED); param_count = __le16_to_cpu(cp->param_count); if (param_count > max_param_count) { bt_dev_err(hdev, "load_conn_param: too big param_count value %u", param_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, MGMT_STATUS_INVALID_PARAMS); } expected_len = struct_size(cp, params, param_count); if (expected_len != len) { bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes", expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, MGMT_STATUS_INVALID_PARAMS); } bt_dev_dbg(hdev, "param_count %u", param_count); hci_dev_lock(hdev); if (param_count > 1) hci_conn_params_clear_disabled(hdev); for (i = 0; i < param_count; i++) { struct mgmt_conn_param *param = &cp->params[i]; struct hci_conn_params *hci_param; u16 min, max, latency, timeout; bool update = false; u8 addr_type; bt_dev_dbg(hdev, "Adding %pMR (type %u)", ¶m->addr.bdaddr, param->addr.type); if (param->addr.type == BDADDR_LE_PUBLIC) { addr_type = ADDR_LE_DEV_PUBLIC; } else if (param->addr.type == BDADDR_LE_RANDOM) { addr_type = ADDR_LE_DEV_RANDOM; } else { bt_dev_err(hdev, "ignoring invalid connection parameters"); continue; } min = le16_to_cpu(param->min_interval); max = le16_to_cpu(param->max_interval); latency = le16_to_cpu(param->latency); timeout = le16_to_cpu(param->timeout); bt_dev_dbg(hdev, "min 0x%04x max 0x%04x latency 0x%04x timeout 0x%04x", min, max, latency, timeout); if (hci_check_conn_params(min, max, latency, timeout) < 0) { bt_dev_err(hdev, "ignoring invalid connection parameters"); continue; } /* Detect when the loading is for an existing parameter then * attempt to trigger the connection update procedure. */ if (!i && param_count == 1) { hci_param = hci_conn_params_lookup(hdev, ¶m->addr.bdaddr, addr_type); if (hci_param) update = true; else hci_conn_params_clear_disabled(hdev); } hci_param = hci_conn_params_add(hdev, ¶m->addr.bdaddr, addr_type); if (!hci_param) { bt_dev_err(hdev, "failed to add connection parameters"); continue; } hci_param->conn_min_interval = min; hci_param->conn_max_interval = max; hci_param->conn_latency = latency; hci_param->supervision_timeout = timeout; /* Check if we need to trigger a connection update */ if (update) { struct hci_conn *conn; /* Lookup for existing connection as central and check * if parameters match and if they don't then trigger * a connection update. */ conn = hci_conn_hash_lookup_le(hdev, &hci_param->addr, addr_type); if (conn && conn->role == HCI_ROLE_MASTER && (conn->le_conn_min_interval != min || conn->le_conn_max_interval != max || conn->le_conn_latency != latency || conn->le_supv_timeout != timeout)) hci_cmd_sync_queue(hdev, conn_update_sync, hci_param, NULL); } } hci_dev_unlock(hdev); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 0, NULL, 0); } static int set_external_config(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_external_config *cp = data; bool changed; int err; bt_dev_dbg(hdev, "sock %p", sk); if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_STATUS_REJECTED); if (cp->config != 0x00 && cp->config != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_STATUS_INVALID_PARAMS); if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); if (cp->config) changed = !hci_dev_test_and_set_flag(hdev, HCI_EXT_CONFIGURED); else changed = hci_dev_test_and_clear_flag(hdev, HCI_EXT_CONFIGURED); err = send_options_rsp(sk, MGMT_OP_SET_EXTERNAL_CONFIG, hdev); if (err < 0) goto unlock; if (!changed) goto unlock; err = new_options(hdev, sk); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) == is_configured(hdev)) { mgmt_index_removed(hdev); if (hci_dev_test_and_change_flag(hdev, HCI_UNCONFIGURED)) { hci_dev_set_flag(hdev, HCI_CONFIG); hci_dev_set_flag(hdev, HCI_AUTO_OFF); queue_work(hdev->req_workqueue, &hdev->power_on); } else { set_bit(HCI_RAW, &hdev->flags); mgmt_index_added(hdev); } } unlock: hci_dev_unlock(hdev); return err; } static int set_public_address(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_public_address *cp = data; bool changed; int err; bt_dev_dbg(hdev, "sock %p", sk); if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, MGMT_STATUS_REJECTED); if (!bacmp(&cp->bdaddr, BDADDR_ANY)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, MGMT_STATUS_INVALID_PARAMS); if (!hdev->set_bdaddr) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); changed = !!bacmp(&hdev->public_addr, &cp->bdaddr); bacpy(&hdev->public_addr, &cp->bdaddr); err = send_options_rsp(sk, MGMT_OP_SET_PUBLIC_ADDRESS, hdev); if (err < 0) goto unlock; if (!changed) goto unlock; if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) err = new_options(hdev, sk); if (is_configured(hdev)) { mgmt_index_removed(hdev); hci_dev_clear_flag(hdev, HCI_UNCONFIGURED); hci_dev_set_flag(hdev, HCI_CONFIG); hci_dev_set_flag(hdev, HCI_AUTO_OFF); queue_work(hdev->req_workqueue, &hdev->power_on); } unlock: hci_dev_unlock(hdev); return err; } static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data, int err) { const struct mgmt_cp_read_local_oob_ext_data *mgmt_cp; struct mgmt_rp_read_local_oob_ext_data *mgmt_rp; u8 *h192, *r192, *h256, *r256; struct mgmt_pending_cmd *cmd = data; struct sk_buff *skb = cmd->skb; u8 status = mgmt_status(err); u16 eir_len; if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev)) return; if (!status) { if (!skb) status = MGMT_STATUS_FAILED; else if (IS_ERR(skb)) status = mgmt_status(PTR_ERR(skb)); else status = mgmt_status(skb->data[0]); } bt_dev_dbg(hdev, "status %u", status); mgmt_cp = cmd->param; if (status) { status = mgmt_status(status); eir_len = 0; h192 = NULL; r192 = NULL; h256 = NULL; r256 = NULL; } else if (!bredr_sc_enabled(hdev)) { struct hci_rp_read_local_oob_data *rp; if (skb->len != sizeof(*rp)) { status = MGMT_STATUS_FAILED; eir_len = 0; } else { status = MGMT_STATUS_SUCCESS; rp = (void *)skb->data; eir_len = 5 + 18 + 18; h192 = rp->hash; r192 = rp->rand; h256 = NULL; r256 = NULL; } } else { struct hci_rp_read_local_oob_ext_data *rp; if (skb->len != sizeof(*rp)) { status = MGMT_STATUS_FAILED; eir_len = 0; } else { status = MGMT_STATUS_SUCCESS; rp = (void *)skb->data; if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { eir_len = 5 + 18 + 18; h192 = NULL; r192 = NULL; } else { eir_len = 5 + 18 + 18 + 18 + 18; h192 = rp->hash192; r192 = rp->rand192; } h256 = rp->hash256; r256 = rp->rand256; } } mgmt_rp = kmalloc(sizeof(*mgmt_rp) + eir_len, GFP_KERNEL); if (!mgmt_rp) goto done; if (eir_len == 0) goto send_rsp; eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV, hdev->dev_class, 3); if (h192 && r192) { eir_len = eir_append_data(mgmt_rp->eir, eir_len, EIR_SSP_HASH_C192, h192, 16); eir_len = eir_append_data(mgmt_rp->eir, eir_len, EIR_SSP_RAND_R192, r192, 16); } if (h256 && r256) { eir_len = eir_append_data(mgmt_rp->eir, eir_len, EIR_SSP_HASH_C256, h256, 16); eir_len = eir_append_data(mgmt_rp->eir, eir_len, EIR_SSP_RAND_R256, r256, 16); } send_rsp: mgmt_rp->type = mgmt_cp->type; mgmt_rp->eir_len = cpu_to_le16(eir_len); err = mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, status, mgmt_rp, sizeof(*mgmt_rp) + eir_len); if (err < 0 || status) goto done; hci_sock_set_flag(cmd->sk, HCI_MGMT_OOB_DATA_EVENTS); err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev, mgmt_rp, sizeof(*mgmt_rp) + eir_len, HCI_MGMT_OOB_DATA_EVENTS, cmd->sk); done: if (skb && !IS_ERR(skb)) kfree_skb(skb); kfree(mgmt_rp); mgmt_pending_remove(cmd); } static int read_local_ssp_oob_req(struct hci_dev *hdev, struct sock *sk, struct mgmt_cp_read_local_oob_ext_data *cp) { struct mgmt_pending_cmd *cmd; int err; cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev, cp, sizeof(*cp)); if (!cmd) return -ENOMEM; err = hci_cmd_sync_queue(hdev, read_local_oob_data_sync, cmd, read_local_oob_ext_data_complete); if (err < 0) { mgmt_pending_remove(cmd); return err; } return 0; } static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_read_local_oob_ext_data *cp = data; struct mgmt_rp_read_local_oob_ext_data *rp; size_t rp_len; u16 eir_len; u8 status, flags, role, addr[7], hash[16], rand[16]; int err; bt_dev_dbg(hdev, "sock %p", sk); if (hdev_is_powered(hdev)) { switch (cp->type) { case BIT(BDADDR_BREDR): status = mgmt_bredr_support(hdev); if (status) eir_len = 0; else eir_len = 5; break; case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)): status = mgmt_le_support(hdev); if (status) eir_len = 0; else eir_len = 9 + 3 + 18 + 18 + 3; break; default: status = MGMT_STATUS_INVALID_PARAMS; eir_len = 0; break; } } else { status = MGMT_STATUS_NOT_POWERED; eir_len = 0; } rp_len = sizeof(*rp) + eir_len; rp = kmalloc(rp_len, GFP_ATOMIC); if (!rp) return -ENOMEM; if (!status && !lmp_ssp_capable(hdev)) { status = MGMT_STATUS_NOT_SUPPORTED; eir_len = 0; } if (status) goto complete; hci_dev_lock(hdev); eir_len = 0; switch (cp->type) { case BIT(BDADDR_BREDR): if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { err = read_local_ssp_oob_req(hdev, sk, cp); hci_dev_unlock(hdev); if (!err) goto done; status = MGMT_STATUS_FAILED; goto complete; } else { eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, hdev->dev_class, 3); } break; case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)): if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) && smp_generate_oob(hdev, hash, rand) < 0) { hci_dev_unlock(hdev); status = MGMT_STATUS_FAILED; goto complete; } /* This should return the active RPA, but since the RPA * is only programmed on demand, it is really hard to fill * this in at the moment. For now disallow retrieving * local out-of-band data when privacy is in use. * * Returning the identity address will not help here since * pairing happens before the identity resolving key is * known and thus the connection establishment happens * based on the RPA and not the identity address. */ if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { hci_dev_unlock(hdev); status = MGMT_STATUS_REJECTED; goto complete; } if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !bacmp(&hdev->bdaddr, BDADDR_ANY) || (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && bacmp(&hdev->static_addr, BDADDR_ANY))) { memcpy(addr, &hdev->static_addr, 6); addr[6] = 0x01; } else { memcpy(addr, &hdev->bdaddr, 6); addr[6] = 0x00; } eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_BDADDR, addr, sizeof(addr)); if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) role = 0x02; else role = 0x01; eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_ROLE, &role, sizeof(role)); if (hci_dev_test_flag(hdev, HCI_SC_ENABLED)) { eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_SC_CONFIRM, hash, sizeof(hash)); eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_SC_RANDOM, rand, sizeof(rand)); } flags = mgmt_get_adv_discov_flags(hdev); if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) flags |= LE_AD_NO_BREDR; eir_len = eir_append_data(rp->eir, eir_len, EIR_FLAGS, &flags, sizeof(flags)); break; } hci_dev_unlock(hdev); hci_sock_set_flag(sk, HCI_MGMT_OOB_DATA_EVENTS); status = MGMT_STATUS_SUCCESS; complete: rp->type = cp->type; rp->eir_len = cpu_to_le16(eir_len); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, status, rp, sizeof(*rp) + eir_len); if (err < 0 || status) goto done; err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev, rp, sizeof(*rp) + eir_len, HCI_MGMT_OOB_DATA_EVENTS, sk); done: kfree(rp); return err; } static u32 get_supported_adv_flags(struct hci_dev *hdev) { u32 flags = 0; flags |= MGMT_ADV_FLAG_CONNECTABLE; flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; flags |= MGMT_ADV_FLAG_APPEARANCE; flags |= MGMT_ADV_FLAG_LOCAL_NAME; flags |= MGMT_ADV_PARAM_DURATION; flags |= MGMT_ADV_PARAM_TIMEOUT; flags |= MGMT_ADV_PARAM_INTERVALS; flags |= MGMT_ADV_PARAM_TX_POWER; flags |= MGMT_ADV_PARAM_SCAN_RSP; /* In extended adv TX_POWER returned from Set Adv Param * will be always valid. */ if (hdev->adv_tx_power != HCI_TX_POWER_INVALID || ext_adv_capable(hdev)) flags |= MGMT_ADV_FLAG_TX_POWER; if (ext_adv_capable(hdev)) { flags |= MGMT_ADV_FLAG_SEC_1M; flags |= MGMT_ADV_FLAG_HW_OFFLOAD; flags |= MGMT_ADV_FLAG_CAN_SET_TX_POWER; if (le_2m_capable(hdev)) flags |= MGMT_ADV_FLAG_SEC_2M; if (le_coded_capable(hdev)) flags |= MGMT_ADV_FLAG_SEC_CODED; } return flags; } static int read_adv_features(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_adv_features *rp; size_t rp_len; int err; struct adv_info *adv_instance; u32 supported_flags; u8 *instance; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, MGMT_STATUS_REJECTED); hci_dev_lock(hdev); rp_len = sizeof(*rp) + hdev->adv_instance_cnt; rp = kmalloc(rp_len, GFP_ATOMIC); if (!rp) { hci_dev_unlock(hdev); return -ENOMEM; } supported_flags = get_supported_adv_flags(hdev); rp->supported_flags = cpu_to_le32(supported_flags); rp->max_adv_data_len = max_adv_len(hdev); rp->max_scan_rsp_len = max_adv_len(hdev); rp->max_instances = hdev->le_num_of_adv_sets; rp->num_instances = hdev->adv_instance_cnt; instance = rp->instance; list_for_each_entry(adv_instance, &hdev->adv_instances, list) { /* Only instances 1-le_num_of_adv_sets are externally visible */ if (adv_instance->instance <= hdev->adv_instance_cnt) { *instance = adv_instance->instance; instance++; } else { rp->num_instances--; rp_len--; } } hci_dev_unlock(hdev); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, MGMT_STATUS_SUCCESS, rp, rp_len); kfree(rp); return err; } static u8 calculate_name_len(struct hci_dev *hdev) { u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 2]; /* len + type + name */ return eir_append_local_name(hdev, buf, 0); } static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, bool is_adv_data) { u8 max_len = max_adv_len(hdev); if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | MGMT_ADV_FLAG_MANAGED_FLAGS)) max_len -= 3; if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; } else { if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= calculate_name_len(hdev); if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) max_len -= 4; } return max_len; } static bool flags_managed(u32 adv_flags) { return adv_flags & (MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | MGMT_ADV_FLAG_MANAGED_FLAGS); } static bool tx_power_managed(u32 adv_flags) { return adv_flags & MGMT_ADV_FLAG_TX_POWER; } static bool name_managed(u32 adv_flags) { return adv_flags & MGMT_ADV_FLAG_LOCAL_NAME; } static bool appearance_managed(u32 adv_flags) { return adv_flags & MGMT_ADV_FLAG_APPEARANCE; } static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, u8 len, bool is_adv_data) { int i, cur_len; u8 max_len; max_len = tlv_data_max_len(hdev, adv_flags, is_adv_data); if (len > max_len) return false; /* Make sure that the data is correctly formatted. */ for (i = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; if (!cur_len) continue; if (data[i + 1] == EIR_FLAGS && (!is_adv_data || flags_managed(adv_flags))) return false; if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) return false; if (data[i + 1] == EIR_NAME_COMPLETE && name_managed(adv_flags)) return false; if (data[i + 1] == EIR_NAME_SHORT && name_managed(adv_flags)) return false; if (data[i + 1] == EIR_APPEARANCE && appearance_managed(adv_flags)) return false; /* If the current field length would exceed the total data * length, then it's invalid. */ if (i + cur_len >= len) return false; } return true; } static bool requested_adv_flags_are_valid(struct hci_dev *hdev, u32 adv_flags) { u32 supported_flags, phy_flags; /* The current implementation only supports a subset of the specified * flags. Also need to check mutual exclusiveness of sec flags. */ supported_flags = get_supported_adv_flags(hdev); phy_flags = adv_flags & MGMT_ADV_FLAG_SEC_MASK; if (adv_flags & ~supported_flags || ((phy_flags && (phy_flags ^ (phy_flags & -phy_flags))))) return false; return true; } static bool adv_busy(struct hci_dev *hdev) { return pending_find(MGMT_OP_SET_LE, hdev); } static void add_adv_complete(struct hci_dev *hdev, struct sock *sk, u8 instance, int err) { struct adv_info *adv, *n; bt_dev_dbg(hdev, "err %d", err); hci_dev_lock(hdev); list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) { u8 instance; if (!adv->pending) continue; if (!err) { adv->pending = false; continue; } instance = adv->instance; if (hdev->cur_adv_instance == instance) cancel_adv_timeout(hdev); hci_remove_adv_instance(hdev, instance); mgmt_advertising_removed(sk, hdev, instance); } hci_dev_unlock(hdev); } static void add_advertising_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_add_advertising *cp = cmd->param; struct mgmt_rp_add_advertising rp; memset(&rp, 0, sizeof(rp)); rp.instance = cp->instance; if (err) mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err)); else mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); add_adv_complete(hdev, cmd->sk, cp->instance, err); mgmt_pending_free(cmd); } static int add_advertising_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_add_advertising *cp = cmd->param; return hci_schedule_adv_instance_sync(hdev, cp->instance, true); } static int add_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_add_advertising *cp = data; struct mgmt_rp_add_advertising rp; u32 flags; u8 status; u16 timeout, duration; unsigned int prev_instance_cnt; u8 schedule_instance = 0; struct adv_info *adv, *next_instance; int err; struct mgmt_pending_cmd *cmd; bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_le_support(hdev); if (status) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); flags = __le32_to_cpu(cp->flags); timeout = __le16_to_cpu(cp->timeout); duration = __le16_to_cpu(cp->duration); if (!requested_adv_flags_are_valid(hdev, flags)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (timeout && !hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_REJECTED); goto unlock; } if (adv_busy(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_BUSY); goto unlock; } if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); goto unlock; } prev_instance_cnt = hdev->adv_instance_cnt; adv = hci_add_adv_instance(hdev, cp->instance, flags, cp->adv_data_len, cp->data, cp->scan_rsp_len, cp->data + cp->adv_data_len, timeout, duration, HCI_ADV_TX_POWER_NO_PREFERENCE, hdev->le_adv_min_interval, hdev->le_adv_max_interval, 0); if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_FAILED); goto unlock; } /* Only trigger an advertising added event if a new instance was * actually added. */ if (hdev->adv_instance_cnt > prev_instance_cnt) mgmt_advertising_added(sk, hdev, cp->instance); if (hdev->cur_adv_instance == cp->instance) { /* If the currently advertised instance is being changed then * cancel the current advertising and schedule the next * instance. If there is only one instance then the overridden * advertising data will be visible right away. */ cancel_adv_timeout(hdev); next_instance = hci_get_next_instance(hdev, cp->instance); if (next_instance) schedule_instance = next_instance->instance; } else if (!hdev->adv_instance_timeout) { /* Immediately advertise the new instance if no other * instance is currently being advertised. */ schedule_instance = cp->instance; } /* If the HCI_ADVERTISING flag is set or the device isn't powered or * there is no instance to be advertised then we have no HCI * communication to make. Simply return. */ if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING) || !schedule_instance) { rp.instance = cp->instance; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); goto unlock; } /* We're good to go, update advertising data, parameters, and start * advertising. */ cmd = mgmt_pending_new(sk, MGMT_OP_ADD_ADVERTISING, hdev, data, data_len); if (!cmd) { err = -ENOMEM; goto unlock; } cp->instance = schedule_instance; err = hci_cmd_sync_queue(hdev, add_advertising_sync, cmd, add_advertising_complete); if (err < 0) mgmt_pending_free(cmd); unlock: hci_dev_unlock(hdev); return err; } static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_add_ext_adv_params *cp = cmd->param; struct mgmt_rp_add_ext_adv_params rp; struct adv_info *adv; u32 flags; BT_DBG("%s", hdev->name); hci_dev_lock(hdev); adv = hci_find_adv_instance(hdev, cp->instance); if (!adv) goto unlock; rp.instance = cp->instance; rp.tx_power = adv->tx_power; /* While we're at it, inform userspace of the available space for this * advertisement, given the flags that will be used. */ flags = __le32_to_cpu(cp->flags); rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); if (err) { /* If this advertisement was previously advertising and we * failed to update it, we signal that it has been removed and * delete its structure */ if (!adv->pending) mgmt_advertising_removed(cmd->sk, hdev, cp->instance); hci_remove_adv_instance(hdev, cp->instance); mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err)); } else { mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); } unlock: mgmt_pending_free(cmd); hci_dev_unlock(hdev); } static int add_ext_adv_params_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_add_ext_adv_params *cp = cmd->param; return hci_setup_ext_adv_instance_sync(hdev, cp->instance); } static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_add_ext_adv_params *cp = data; struct mgmt_rp_add_ext_adv_params rp; struct mgmt_pending_cmd *cmd = NULL; struct adv_info *adv; u32 flags, min_interval, max_interval; u16 timeout, duration; u8 status; s8 tx_power; int err; BT_DBG("%s", hdev->name); status = mgmt_le_support(hdev); if (status) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, status); if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_INVALID_PARAMS); /* The purpose of breaking add_advertising into two separate MGMT calls * for params and data is to allow more parameters to be added to this * structure in the future. For this reason, we verify that we have the * bare minimum structure we know of when the interface was defined. Any * extra parameters we don't know about will be ignored in this request. */ if (data_len < MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_INVALID_PARAMS); flags = __le32_to_cpu(cp->flags); if (!requested_adv_flags_are_valid(hdev, flags)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); /* In new interface, we require that we are powered to register */ if (!hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_REJECTED); goto unlock; } if (adv_busy(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_BUSY); goto unlock; } /* Parse defined parameters from request, use defaults otherwise */ timeout = (flags & MGMT_ADV_PARAM_TIMEOUT) ? __le16_to_cpu(cp->timeout) : 0; duration = (flags & MGMT_ADV_PARAM_DURATION) ? __le16_to_cpu(cp->duration) : hdev->def_multi_adv_rotation_duration; min_interval = (flags & MGMT_ADV_PARAM_INTERVALS) ? __le32_to_cpu(cp->min_interval) : hdev->le_adv_min_interval; max_interval = (flags & MGMT_ADV_PARAM_INTERVALS) ? __le32_to_cpu(cp->max_interval) : hdev->le_adv_max_interval; tx_power = (flags & MGMT_ADV_PARAM_TX_POWER) ? cp->tx_power : HCI_ADV_TX_POWER_NO_PREFERENCE; /* Create advertising instance with no advertising or response data */ adv = hci_add_adv_instance(hdev, cp->instance, flags, 0, NULL, 0, NULL, timeout, duration, tx_power, min_interval, max_interval, 0); if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_FAILED); goto unlock; } /* Submit request for advertising params if ext adv available */ if (ext_adv_capable(hdev)) { cmd = mgmt_pending_new(sk, MGMT_OP_ADD_EXT_ADV_PARAMS, hdev, data, data_len); if (!cmd) { err = -ENOMEM; hci_remove_adv_instance(hdev, cp->instance); goto unlock; } err = hci_cmd_sync_queue(hdev, add_ext_adv_params_sync, cmd, add_ext_adv_params_complete); if (err < 0) mgmt_pending_free(cmd); } else { rp.instance = cp->instance; rp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); } unlock: hci_dev_unlock(hdev); return err; } static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_add_ext_adv_data *cp = cmd->param; struct mgmt_rp_add_advertising rp; add_adv_complete(hdev, cmd->sk, cp->instance, err); memset(&rp, 0, sizeof(rp)); rp.instance = cp->instance; if (err) mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err)); else mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); mgmt_pending_free(cmd); } static int add_ext_adv_data_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_add_ext_adv_data *cp = cmd->param; int err; if (ext_adv_capable(hdev)) { err = hci_update_adv_data_sync(hdev, cp->instance); if (err) return err; err = hci_update_scan_rsp_data_sync(hdev, cp->instance); if (err) return err; return hci_enable_ext_advertising_sync(hdev, cp->instance); } return hci_schedule_adv_instance_sync(hdev, cp->instance, true); } static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_add_ext_adv_data *cp = data; struct mgmt_rp_add_ext_adv_data rp; u8 schedule_instance = 0; struct adv_info *next_instance; struct adv_info *adv_instance; int err = 0; struct mgmt_pending_cmd *cmd; BT_DBG("%s", hdev->name); hci_dev_lock(hdev); adv_instance = hci_find_adv_instance(hdev, cp->instance); if (!adv_instance) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_STATUS_INVALID_PARAMS); goto unlock; } /* In new interface, we require that we are powered to register */ if (!hdev_is_powered(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_STATUS_REJECTED); goto clear_new_instance; } if (adv_busy(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_STATUS_BUSY); goto clear_new_instance; } /* Validate new data */ if (!tlv_data_is_valid(hdev, adv_instance->flags, cp->data, cp->adv_data_len, true) || !tlv_data_is_valid(hdev, adv_instance->flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_STATUS_INVALID_PARAMS); goto clear_new_instance; } /* Set the data in the advertising instance */ hci_set_adv_instance_data(hdev, cp->instance, cp->adv_data_len, cp->data, cp->scan_rsp_len, cp->data + cp->adv_data_len); /* If using software rotation, determine next instance to use */ if (hdev->cur_adv_instance == cp->instance) { /* If the currently advertised instance is being changed * then cancel the current advertising and schedule the * next instance. If there is only one instance then the * overridden advertising data will be visible right * away */ cancel_adv_timeout(hdev); next_instance = hci_get_next_instance(hdev, cp->instance); if (next_instance) schedule_instance = next_instance->instance; } else if (!hdev->adv_instance_timeout) { /* Immediately advertise the new instance if no other * instance is currently being advertised. */ schedule_instance = cp->instance; } /* If the HCI_ADVERTISING flag is set or there is no instance to * be advertised then we have no HCI communication to make. * Simply return. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || !schedule_instance) { if (adv_instance->pending) { mgmt_advertising_added(sk, hdev, cp->instance); adv_instance->pending = false; } rp.instance = cp->instance; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); goto unlock; } cmd = mgmt_pending_new(sk, MGMT_OP_ADD_EXT_ADV_DATA, hdev, data, data_len); if (!cmd) { err = -ENOMEM; goto clear_new_instance; } err = hci_cmd_sync_queue(hdev, add_ext_adv_data_sync, cmd, add_ext_adv_data_complete); if (err < 0) { mgmt_pending_free(cmd); goto clear_new_instance; } /* We were successful in updating data, so trigger advertising_added * event if this is an instance that wasn't previously advertising. If * a failure occurs in the requests we initiated, we will remove the * instance again in add_advertising_complete */ if (adv_instance->pending) mgmt_advertising_added(sk, hdev, cp->instance); goto unlock; clear_new_instance: hci_remove_adv_instance(hdev, cp->instance); unlock: hci_dev_unlock(hdev); return err; } static void remove_advertising_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_advertising *cp = cmd->param; struct mgmt_rp_remove_advertising rp; bt_dev_dbg(hdev, "err %d", err); memset(&rp, 0, sizeof(rp)); rp.instance = cp->instance; if (err) mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err)); else mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); mgmt_pending_free(cmd); } static int remove_advertising_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_advertising *cp = cmd->param; int err; err = hci_remove_advertising_sync(hdev, cmd->sk, cp->instance, true); if (err) return err; if (list_empty(&hdev->adv_instances)) err = hci_disable_advertising_sync(hdev); return err; } static int remove_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_remove_advertising *cp = data; struct mgmt_pending_cmd *cmd; int err; bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); if (cp->instance && !hci_find_adv_instance(hdev, cp->instance)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); goto unlock; } if (pending_find(MGMT_OP_SET_LE, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_BUSY); goto unlock; } if (list_empty(&hdev->adv_instances)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); goto unlock; } cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADVERTISING, hdev, data, data_len); if (!cmd) { err = -ENOMEM; goto unlock; } err = hci_cmd_sync_queue(hdev, remove_advertising_sync, cmd, remove_advertising_complete); if (err < 0) mgmt_pending_free(cmd); unlock: hci_dev_unlock(hdev); return err; } static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_cp_get_adv_size_info *cp = data; struct mgmt_rp_get_adv_size_info rp; u32 flags, supported_flags; bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_STATUS_REJECTED); if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_STATUS_INVALID_PARAMS); flags = __le32_to_cpu(cp->flags); /* The current implementation only supports a subset of the specified * flags. */ supported_flags = get_supported_adv_flags(hdev); if (flags & ~supported_flags) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_STATUS_INVALID_PARAMS); rp.instance = cp->instance; rp.flags = cp->flags; rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); } static const struct hci_mgmt_handler mgmt_handlers[] = { { NULL }, /* 0x0000 (no command) */ { read_version, MGMT_READ_VERSION_SIZE, HCI_MGMT_NO_HDEV | HCI_MGMT_UNTRUSTED }, { read_commands, MGMT_READ_COMMANDS_SIZE, HCI_MGMT_NO_HDEV | HCI_MGMT_UNTRUSTED }, { read_index_list, MGMT_READ_INDEX_LIST_SIZE, HCI_MGMT_NO_HDEV | HCI_MGMT_UNTRUSTED }, { read_controller_info, MGMT_READ_INFO_SIZE, HCI_MGMT_UNTRUSTED }, { set_powered, MGMT_SETTING_SIZE }, { set_discoverable, MGMT_SET_DISCOVERABLE_SIZE }, { set_connectable, MGMT_SETTING_SIZE }, { set_fast_connectable, MGMT_SETTING_SIZE }, { set_bondable, MGMT_SETTING_SIZE }, { set_link_security, MGMT_SETTING_SIZE }, { set_ssp, MGMT_SETTING_SIZE }, { set_hs, MGMT_SETTING_SIZE }, { set_le, MGMT_SETTING_SIZE }, { set_dev_class, MGMT_SET_DEV_CLASS_SIZE }, { set_local_name, MGMT_SET_LOCAL_NAME_SIZE }, { add_uuid, MGMT_ADD_UUID_SIZE }, { remove_uuid, MGMT_REMOVE_UUID_SIZE }, { load_link_keys, MGMT_LOAD_LINK_KEYS_SIZE, HCI_MGMT_VAR_LEN }, { load_long_term_keys, MGMT_LOAD_LONG_TERM_KEYS_SIZE, HCI_MGMT_VAR_LEN }, { disconnect, MGMT_DISCONNECT_SIZE }, { get_connections, MGMT_GET_CONNECTIONS_SIZE }, { pin_code_reply, MGMT_PIN_CODE_REPLY_SIZE }, { pin_code_neg_reply, MGMT_PIN_CODE_NEG_REPLY_SIZE }, { set_io_capability, MGMT_SET_IO_CAPABILITY_SIZE }, { pair_device, MGMT_PAIR_DEVICE_SIZE }, { cancel_pair_device, MGMT_CANCEL_PAIR_DEVICE_SIZE }, { unpair_device, MGMT_UNPAIR_DEVICE_SIZE }, { user_confirm_reply, MGMT_USER_CONFIRM_REPLY_SIZE }, { user_confirm_neg_reply, MGMT_USER_CONFIRM_NEG_REPLY_SIZE }, { user_passkey_reply, MGMT_USER_PASSKEY_REPLY_SIZE }, { user_passkey_neg_reply, MGMT_USER_PASSKEY_NEG_REPLY_SIZE }, { read_local_oob_data, MGMT_READ_LOCAL_OOB_DATA_SIZE }, { add_remote_oob_data, MGMT_ADD_REMOTE_OOB_DATA_SIZE, HCI_MGMT_VAR_LEN }, { remove_remote_oob_data, MGMT_REMOVE_REMOTE_OOB_DATA_SIZE }, { start_discovery, MGMT_START_DISCOVERY_SIZE }, { stop_discovery, MGMT_STOP_DISCOVERY_SIZE }, { confirm_name, MGMT_CONFIRM_NAME_SIZE }, { block_device, MGMT_BLOCK_DEVICE_SIZE }, { unblock_device, MGMT_UNBLOCK_DEVICE_SIZE }, { set_device_id, MGMT_SET_DEVICE_ID_SIZE }, { set_advertising, MGMT_SETTING_SIZE }, { set_bredr, MGMT_SETTING_SIZE }, { set_static_address, MGMT_SET_STATIC_ADDRESS_SIZE }, { set_scan_params, MGMT_SET_SCAN_PARAMS_SIZE }, { set_secure_conn, MGMT_SETTING_SIZE }, { set_debug_keys, MGMT_SETTING_SIZE }, { set_privacy, MGMT_SET_PRIVACY_SIZE }, { load_irks, MGMT_LOAD_IRKS_SIZE, HCI_MGMT_VAR_LEN }, { get_conn_info, MGMT_GET_CONN_INFO_SIZE }, { get_clock_info, MGMT_GET_CLOCK_INFO_SIZE }, { add_device, MGMT_ADD_DEVICE_SIZE }, { remove_device, MGMT_REMOVE_DEVICE_SIZE }, { load_conn_param, MGMT_LOAD_CONN_PARAM_SIZE, HCI_MGMT_VAR_LEN }, { read_unconf_index_list, MGMT_READ_UNCONF_INDEX_LIST_SIZE, HCI_MGMT_NO_HDEV | HCI_MGMT_UNTRUSTED }, { read_config_info, MGMT_READ_CONFIG_INFO_SIZE, HCI_MGMT_UNCONFIGURED | HCI_MGMT_UNTRUSTED }, { set_external_config, MGMT_SET_EXTERNAL_CONFIG_SIZE, HCI_MGMT_UNCONFIGURED }, { set_public_address, MGMT_SET_PUBLIC_ADDRESS_SIZE, HCI_MGMT_UNCONFIGURED }, { start_service_discovery, MGMT_START_SERVICE_DISCOVERY_SIZE, HCI_MGMT_VAR_LEN }, { read_local_oob_ext_data, MGMT_READ_LOCAL_OOB_EXT_DATA_SIZE }, { read_ext_index_list, MGMT_READ_EXT_INDEX_LIST_SIZE, HCI_MGMT_NO_HDEV | HCI_MGMT_UNTRUSTED }, { read_adv_features, MGMT_READ_ADV_FEATURES_SIZE }, { add_advertising, MGMT_ADD_ADVERTISING_SIZE, HCI_MGMT_VAR_LEN }, { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, HCI_MGMT_UNTRUSTED }, { set_appearance, MGMT_SET_APPEARANCE_SIZE }, { get_phy_configuration, MGMT_GET_PHY_CONFIGURATION_SIZE }, { set_phy_configuration, MGMT_SET_PHY_CONFIGURATION_SIZE }, { set_blocked_keys, MGMT_OP_SET_BLOCKED_KEYS_SIZE, HCI_MGMT_VAR_LEN }, { set_wideband_speech, MGMT_SETTING_SIZE }, { read_controller_cap, MGMT_READ_CONTROLLER_CAP_SIZE, HCI_MGMT_UNTRUSTED }, { read_exp_features_info, MGMT_READ_EXP_FEATURES_INFO_SIZE, HCI_MGMT_UNTRUSTED | HCI_MGMT_HDEV_OPTIONAL }, { set_exp_feature, MGMT_SET_EXP_FEATURE_SIZE, HCI_MGMT_VAR_LEN | HCI_MGMT_HDEV_OPTIONAL }, { read_def_system_config, MGMT_READ_DEF_SYSTEM_CONFIG_SIZE, HCI_MGMT_UNTRUSTED }, { set_def_system_config, MGMT_SET_DEF_SYSTEM_CONFIG_SIZE, HCI_MGMT_VAR_LEN }, { read_def_runtime_config, MGMT_READ_DEF_RUNTIME_CONFIG_SIZE, HCI_MGMT_UNTRUSTED }, { set_def_runtime_config, MGMT_SET_DEF_RUNTIME_CONFIG_SIZE, HCI_MGMT_VAR_LEN }, { get_device_flags, MGMT_GET_DEVICE_FLAGS_SIZE }, { set_device_flags, MGMT_SET_DEVICE_FLAGS_SIZE }, { read_adv_mon_features, MGMT_READ_ADV_MONITOR_FEATURES_SIZE }, { add_adv_patterns_monitor,MGMT_ADD_ADV_PATTERNS_MONITOR_SIZE, HCI_MGMT_VAR_LEN }, { remove_adv_monitor, MGMT_REMOVE_ADV_MONITOR_SIZE }, { add_ext_adv_params, MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE, HCI_MGMT_VAR_LEN }, { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, HCI_MGMT_VAR_LEN }, { add_adv_patterns_monitor_rssi, MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE, HCI_MGMT_VAR_LEN }, { set_mesh, MGMT_SET_MESH_RECEIVER_SIZE, HCI_MGMT_VAR_LEN }, { mesh_features, MGMT_MESH_READ_FEATURES_SIZE }, { mesh_send, MGMT_MESH_SEND_SIZE, HCI_MGMT_VAR_LEN }, { mesh_send_cancel, MGMT_MESH_SEND_CANCEL_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) { struct mgmt_ev_ext_index ev; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); ev.type = 0x01; } else { mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, HCI_MGMT_INDEX_EVENTS); ev.type = 0x00; } ev.bus = hdev->bus; mgmt_index_event(MGMT_EV_EXT_INDEX_ADDED, hdev, &ev, sizeof(ev), HCI_MGMT_EXT_INDEX_EVENTS); } void mgmt_index_removed(struct hci_dev *hdev) { struct mgmt_ev_ext_index ev; u8 status = MGMT_STATUS_INVALID_INDEX; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); ev.type = 0x01; } else { mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, HCI_MGMT_INDEX_EVENTS); ev.type = 0x00; } ev.bus = hdev->bus; mgmt_index_event(MGMT_EV_EXT_INDEX_REMOVED, hdev, &ev, sizeof(ev), HCI_MGMT_EXT_INDEX_EVENTS); /* Cancel any remaining timed work */ if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->service_cache); cancel_delayed_work_sync(&hdev->rpa_expired); } void mgmt_power_on(struct hci_dev *hdev, int err) { struct cmd_lookup match = { NULL, hdev }; bt_dev_dbg(hdev, "err %d", err); hci_dev_lock(hdev); if (!err) { restart_le_actions(hdev); hci_update_passive_scan(hdev); } mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); hci_dev_unlock(hdev); } void __mgmt_power_off(struct hci_dev *hdev) { struct cmd_lookup match = { NULL, hdev }; u8 status, zero_cod[] = { 0, 0, 0 }; mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); /* If the power off is because of hdev unregistration let * use the appropriate INVALID_INDEX status. Otherwise use * NOT_POWERED. We cover both scenarios here since later in * mgmt_index_removed() any hci_conn callbacks will have already * been triggered, potentially causing misleading DISCONNECTED * status responses. */ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) status = MGMT_STATUS_INVALID_INDEX; else status = MGMT_STATUS_NOT_POWERED; mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), HCI_MGMT_DEV_CLASS_EVENTS, NULL); ext_info_changed(hdev, NULL); } new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); } void mgmt_set_powered_failed(struct hci_dev *hdev, int err) { struct mgmt_pending_cmd *cmd; u8 status; cmd = pending_find(MGMT_OP_SET_POWERED, hdev); if (!cmd) return; if (err == -ERFKILL) status = MGMT_STATUS_RFKILLED; else status = MGMT_STATUS_FAILED; mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status); mgmt_pending_remove(cmd); } void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent) { struct mgmt_ev_new_link_key ev; memset(&ev, 0, sizeof(ev)); ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); ev.key.addr.type = BDADDR_BREDR; ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL); } static u8 mgmt_ltk_type(struct smp_ltk *ltk) { switch (ltk->type) { case SMP_LTK: case SMP_LTK_RESPONDER: if (ltk->authenticated) return MGMT_LTK_AUTHENTICATED; return MGMT_LTK_UNAUTHENTICATED; case SMP_LTK_P256: if (ltk->authenticated) return MGMT_LTK_P256_AUTH; return MGMT_LTK_P256_UNAUTH; case SMP_LTK_P256_DEBUG: return MGMT_LTK_P256_DEBUG; } return MGMT_LTK_UNAUTHENTICATED; } void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) { struct mgmt_ev_new_long_term_key ev; memset(&ev, 0, sizeof(ev)); /* Devices using resolvable or non-resolvable random addresses * without providing an identity resolving key don't require * to store long term keys. Their addresses will change the * next time around. * * Only when a remote device provides an identity address * make sure the long term key is stored. If the remote * identity is known, the long term keys are internally * mapped to the identity address. So allow static random * and public addresses here. */ if (key->bdaddr_type == ADDR_LE_DEV_RANDOM && (key->bdaddr.b[5] & 0xc0) != 0xc0) ev.store_hint = 0x00; else ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; ev.key.rand = key->rand; if (key->type == SMP_LTK) ev.key.initiator = 1; /* Make sure we copy only the significant bytes based on the * encryption key size, and set the rest of the value to zeroes. */ memcpy(ev.key.val, key->val, key->enc_size); memset(ev.key.val + key->enc_size, 0, sizeof(ev.key.val) - key->enc_size); mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL); } void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) { struct mgmt_ev_new_irk ev; memset(&ev, 0, sizeof(ev)); ev.store_hint = persistent; bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); } void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, bool persistent) { struct mgmt_ev_new_csrk ev; memset(&ev, 0, sizeof(ev)); /* Devices using resolvable or non-resolvable random addresses * without providing an identity resolving key don't require * to store signature resolving keys. Their addresses will change * the next time around. * * Only when a remote device provides an identity address * make sure the signature resolving key is stored. So allow * static random and public addresses here. */ if (csrk->bdaddr_type == ADDR_LE_DEV_RANDOM && (csrk->bdaddr.b[5] & 0xc0) != 0xc0) ev.store_hint = 0x00; else ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL); } void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 store_hint, u16 min_interval, u16 max_interval, u16 latency, u16 timeout) { struct mgmt_ev_new_conn_param ev; if (!hci_is_identity_address(bdaddr, bdaddr_type)) return; memset(&ev, 0, sizeof(ev)); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(LE_LINK, bdaddr_type); ev.store_hint = store_hint; ev.min_interval = cpu_to_le16(min_interval); ev.max_interval = cpu_to_le16(max_interval); ev.latency = cpu_to_le16(latency); ev.timeout = cpu_to_le16(timeout); mgmt_event(MGMT_EV_NEW_CONN_PARAM, hdev, &ev, sizeof(ev), NULL); } void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u8 *name, u8 name_len) { struct sk_buff *skb; struct mgmt_ev_device_connected *ev; u16 eir_len = 0; u32 flags = 0; if (test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) return; /* allocate buff for LE or BR/EDR adv */ if (conn->le_adv_data_len > 0) skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, sizeof(*ev) + conn->le_adv_data_len); else skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + eir_precalc_len(sizeof(conn->dev_class))); ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type); if (conn->out) flags |= MGMT_DEV_FOUND_INITIATED_CONN; ev->flags = __cpu_to_le32(flags); /* We must ensure that the EIR Data fields are ordered and * unique. Keep it simple for now and avoid the problem by not * adding any BR/EDR data to the LE adv. */ if (conn->le_adv_data_len > 0) { skb_put_data(skb, conn->le_adv_data, conn->le_adv_data_len); eir_len = conn->le_adv_data_len; } else { if (name) eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len); if (memcmp(conn->dev_class, "\0\0\0", sizeof(conn->dev_class))) eir_len += eir_skb_put_data(skb, EIR_CLASS_OF_DEV, conn->dev_class, sizeof(conn->dev_class)); } ev->eir_len = cpu_to_le16(eir_len); mgmt_event_skb(skb, NULL); } static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct hci_dev *hdev = data; struct mgmt_cp_unpair_device *cp = cmd->param; device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); cmd->cmd_complete(cmd, 0); mgmt_pending_remove(cmd); } bool mgmt_powering_down(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) return true; cmd = pending_find(MGMT_OP_SET_POWERED, hdev); if (!cmd) return false; cp = cmd->param; if (!cp->val) return true; return false; } void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 reason, bool mgmt_connected) { struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; if (!mgmt_connected) return; if (link_type != ACL_LINK && link_type != LE_LINK) return; bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.reason = reason; /* Report disconnects due to suspend */ if (hdev->suspended) ev.reason = MGMT_DEV_DISCONN_LOCAL_HOST_SUSPEND; mgmt_event(MGMT_EV_DEVICE_DISCONNECTED, hdev, &ev, sizeof(ev), sk); if (sk) sock_put(sk); } void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { u8 bdaddr_type = link_to_bdaddr(link_type, addr_type); struct mgmt_cp_disconnect *cp; struct mgmt_pending_cmd *cmd; mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, hdev); cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) return; cp = cmd->param; if (bacmp(bdaddr, &cp->addr.bdaddr)) return; if (cp->addr.type != bdaddr_type) return; cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); } void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn, u8 status) { struct mgmt_ev_connect_failed ev; if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) { mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type, status, true); return; } bacpy(&ev.addr.bdaddr, &conn->dst); ev.addr.type = link_to_bdaddr(conn->type, conn->dst_type); ev.status = mgmt_status(status); mgmt_event(MGMT_EV_CONNECT_FAILED, hdev, &ev, sizeof(ev), NULL); } void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure) { struct mgmt_ev_pin_code_request ev; bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = BDADDR_BREDR; ev.secure = secure; mgmt_event(MGMT_EV_PIN_CODE_REQUEST, hdev, &ev, sizeof(ev), NULL); } void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { struct mgmt_pending_cmd *cmd; cmd = pending_find(MGMT_OP_PIN_CODE_REPLY, hdev); if (!cmd) return; cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); } void mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { struct mgmt_pending_cmd *cmd; cmd = pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, hdev); if (!cmd) return; cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); } int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u32 value, u8 confirm_hint) { struct mgmt_ev_user_confirm_request ev; bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.confirm_hint = confirm_hint; ev.value = cpu_to_le32(value); return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, hdev, &ev, sizeof(ev), NULL); } int mgmt_user_passkey_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type) { struct mgmt_ev_user_passkey_request ev; bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); return mgmt_event(MGMT_EV_USER_PASSKEY_REQUEST, hdev, &ev, sizeof(ev), NULL); } static int user_pairing_resp_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status, u8 opcode) { struct mgmt_pending_cmd *cmd; cmd = pending_find(opcode, hdev); if (!cmd) return -ENOENT; cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); return 0; } int mgmt_user_confirm_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { return user_pairing_resp_complete(hdev, bdaddr, link_type, addr_type, status, MGMT_OP_USER_CONFIRM_REPLY); } int mgmt_user_confirm_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { return user_pairing_resp_complete(hdev, bdaddr, link_type, addr_type, status, MGMT_OP_USER_CONFIRM_NEG_REPLY); } int mgmt_user_passkey_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { return user_pairing_resp_complete(hdev, bdaddr, link_type, addr_type, status, MGMT_OP_USER_PASSKEY_REPLY); } int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { return user_pairing_resp_complete(hdev, bdaddr, link_type, addr_type, status, MGMT_OP_USER_PASSKEY_NEG_REPLY); } int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u32 passkey, u8 entered) { struct mgmt_ev_passkey_notify ev; bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.passkey = __cpu_to_le32(passkey); ev.entered = entered; return mgmt_event(MGMT_EV_PASSKEY_NOTIFY, hdev, &ev, sizeof(ev), NULL); } void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status) { struct mgmt_ev_auth_failed ev; struct mgmt_pending_cmd *cmd; u8 status = mgmt_status(hci_status); bacpy(&ev.addr.bdaddr, &conn->dst); ev.addr.type = link_to_bdaddr(conn->type, conn->dst_type); ev.status = status; cmd = find_pairing(conn); mgmt_event(MGMT_EV_AUTH_FAILED, conn->hdev, &ev, sizeof(ev), cmd ? cmd->sk : NULL); if (cmd) { cmd->cmd_complete(cmd, status); mgmt_pending_remove(cmd); } } void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) { struct cmd_lookup match = { NULL, hdev }; bool changed; if (status) { u8 mgmt_err = mgmt_status(status); mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, cmd_status_rsp, &mgmt_err); return; } if (test_bit(HCI_AUTH, &hdev->flags)) changed = !hci_dev_test_and_set_flag(hdev, HCI_LINK_SECURITY); else changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, &match); if (changed) new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); } static void sk_lookup(struct mgmt_pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } } void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status) { struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); ext_info_changed(hdev, NULL); } if (match.sk) sock_put(match.sk); } void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) { struct mgmt_cp_set_local_name ev; struct mgmt_pending_cmd *cmd; if (status) return; memset(&ev, 0, sizeof(ev)); memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH); cmd = pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); if (!cmd) { memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); /* If this is a HCI command related to powering on the * HCI dev don't send any mgmt signals. */ if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) return; if (pending_find(MGMT_OP_SET_POWERED, hdev)) return; } mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); ext_info_changed(hdev, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) { int i; for (i = 0; i < uuid_count; i++) { if (!memcmp(uuid, uuids[i], 16)) return true; } return false; } static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) { u16 parsed = 0; while (parsed < eir_len) { u8 field_len = eir[0]; u8 uuid[16]; int i; if (field_len == 0) break; if (eir_len - parsed < field_len + 1) break; switch (eir[1]) { case EIR_UUID16_ALL: case EIR_UUID16_SOME: for (i = 0; i + 3 <= field_len; i += 2) { memcpy(uuid, bluetooth_base_uuid, 16); uuid[13] = eir[i + 3]; uuid[12] = eir[i + 2]; if (has_uuid(uuid, uuid_count, uuids)) return true; } break; case EIR_UUID32_ALL: case EIR_UUID32_SOME: for (i = 0; i + 5 <= field_len; i += 4) { memcpy(uuid, bluetooth_base_uuid, 16); uuid[15] = eir[i + 5]; uuid[14] = eir[i + 4]; uuid[13] = eir[i + 3]; uuid[12] = eir[i + 2]; if (has_uuid(uuid, uuid_count, uuids)) return true; } break; case EIR_UUID128_ALL: case EIR_UUID128_SOME: for (i = 0; i + 17 <= field_len; i += 16) { memcpy(uuid, eir + i + 2, 16); if (has_uuid(uuid, uuid_count, uuids)) return true; } break; } parsed += field_len + 1; eir += field_len + 1; } return false; } static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { /* If a RSSI threshold has been specified, and * HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, then all results with * a RSSI smaller than the RSSI threshold will be dropped. If the quirk * is set, let it through for further processing, as we might need to * restart the scan. * * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, * the results are also dropped. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && (rssi == HCI_RSSI_INVALID || (rssi < hdev->discovery.rssi && !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) return false; if (hdev->discovery.uuid_count != 0) { /* If a list of UUIDs is provided in filter, results with no * matching UUID should be dropped. */ if (!eir_has_uuids(eir, eir_len, hdev->discovery.uuid_count, hdev->discovery.uuids) && !eir_has_uuids(scan_rsp, scan_rsp_len, hdev->discovery.uuid_count, hdev->discovery.uuids)) return false; } /* If duplicate filtering does not report RSSI changes, then restart * scanning to ensure updated result with updated RSSI values. */ if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) { /* Validate RSSI value against the RSSI threshold once more. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && rssi < hdev->discovery.rssi) return false; } return true; } void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type) { struct mgmt_ev_adv_monitor_device_lost ev; ev.monitor_handle = cpu_to_le16(handle); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = addr_type; mgmt_event(MGMT_EV_ADV_MONITOR_DEVICE_LOST, hdev, &ev, sizeof(ev), NULL); } static void mgmt_send_adv_monitor_device_found(struct hci_dev *hdev, struct sk_buff *skb, struct sock *skip_sk, u16 handle) { struct sk_buff *advmon_skb; size_t advmon_skb_len; __le16 *monitor_handle; if (!skb) return; advmon_skb_len = (sizeof(struct mgmt_ev_adv_monitor_device_found) - sizeof(struct mgmt_ev_device_found)) + skb->len; advmon_skb = mgmt_alloc_skb(hdev, MGMT_EV_ADV_MONITOR_DEVICE_FOUND, advmon_skb_len); if (!advmon_skb) return; /* ADV_MONITOR_DEVICE_FOUND is similar to DEVICE_FOUND event except * that it also has 'monitor_handle'. Make a copy of DEVICE_FOUND and * store monitor_handle of the matched monitor. */ monitor_handle = skb_put(advmon_skb, sizeof(*monitor_handle)); *monitor_handle = cpu_to_le16(handle); skb_put_data(advmon_skb, skb->data, skb->len); mgmt_event_skb(advmon_skb, skip_sk); } static void mgmt_adv_monitor_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, bool report_device, struct sk_buff *skb, struct sock *skip_sk) { struct monitored_device *dev, *tmp; bool matched = false; bool notified = false; /* We have received the Advertisement Report because: * 1. the kernel has initiated active discovery * 2. if not, we have pend_le_reports > 0 in which case we are doing * passive scanning * 3. if none of the above is true, we have one or more active * Advertisement Monitor * * For case 1 and 2, report all advertisements via MGMT_EV_DEVICE_FOUND * and report ONLY one advertisement per device for the matched Monitor * via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event. * * For case 3, since we are not active scanning and all advertisements * received are due to a matched Advertisement Monitor, report all * advertisements ONLY via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event. */ if (report_device && !hdev->advmon_pend_notify) { mgmt_event_skb(skb, skip_sk); return; } hdev->advmon_pend_notify = false; list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) { if (!bacmp(&dev->bdaddr, bdaddr)) { matched = true; if (!dev->notified) { mgmt_send_adv_monitor_device_found(hdev, skb, skip_sk, dev->handle); notified = true; dev->notified = true; } } if (!dev->notified) hdev->advmon_pend_notify = true; } if (!report_device && ((matched && !notified) || !msft_monitor_supported(hdev))) { /* Handle 0 indicates that we are not active scanning and this * is a subsequent advertisement report for an already matched * Advertisement Monitor or the controller offloading support * is not available. */ mgmt_send_adv_monitor_device_found(hdev, skb, skip_sk, 0); } if (report_device) mgmt_event_skb(skb, skip_sk); else kfree_skb(skb); } static void mesh_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, u64 instant) { struct sk_buff *skb; struct mgmt_ev_mesh_device_found *ev; int i, j; if (!hdev->mesh_ad_types[0]) goto accepted; /* Scan for requested AD types */ if (eir_len > 0) { for (i = 0; i + 1 < eir_len; i += eir[i] + 1) { for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) { if (!hdev->mesh_ad_types[j]) break; if (hdev->mesh_ad_types[j] == eir[i + 1]) goto accepted; } } } if (scan_rsp_len > 0) { for (i = 0; i + 1 < scan_rsp_len; i += scan_rsp[i] + 1) { for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) { if (!hdev->mesh_ad_types[j]) break; if (hdev->mesh_ad_types[j] == scan_rsp[i + 1]) goto accepted; } } } return; accepted: skb = mgmt_alloc_skb(hdev, MGMT_EV_MESH_DEVICE_FOUND, sizeof(*ev) + eir_len + scan_rsp_len); if (!skb) return; ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); ev->addr.type = link_to_bdaddr(LE_LINK, addr_type); ev->rssi = rssi; ev->flags = cpu_to_le32(flags); ev->instant = cpu_to_le64(instant); if (eir_len > 0) /* Copy EIR or advertising data into event */ skb_put_data(skb, eir, eir_len); if (scan_rsp_len > 0) /* Append scan response data to event */ skb_put_data(skb, scan_rsp, scan_rsp_len); ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); mgmt_event_skb(skb, NULL); } void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, u64 instant) { struct sk_buff *skb; struct mgmt_ev_device_found *ev; bool report_device = hci_discovery_active(hdev); if (hci_dev_test_flag(hdev, HCI_MESH) && link_type == LE_LINK) mesh_device_found(hdev, bdaddr, addr_type, rssi, flags, eir, eir_len, scan_rsp, scan_rsp_len, instant); /* Don't send events for a non-kernel initiated discovery. With * LE one exception is if we have pend_le_reports > 0 in which * case we're doing passive scanning and want these events. */ if (!hci_discovery_active(hdev)) { if (link_type == ACL_LINK) return; if (link_type == LE_LINK && !list_empty(&hdev->pend_le_reports)) report_device = true; else if (!hci_is_adv_monitoring(hdev)) return; } if (hdev->discovery.result_filtering) { /* We are using service discovery */ if (!is_filter_match(hdev, rssi, eir, eir_len, scan_rsp, scan_rsp_len)) return; } if (hdev->discovery.limited) { /* Check for limited discoverable bit */ if (dev_class) { if (!(dev_class[1] & 0x20)) return; } else { u8 *flags = eir_get_data(eir, eir_len, EIR_FLAGS, NULL); if (!flags || !(flags[0] & LE_AD_LIMITED)) return; } } /* Allocate skb. The 5 extra bytes are for the potential CoD field */ skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, sizeof(*ev) + eir_len + scan_rsp_len + 5); if (!skb) return; ev = skb_put(skb, sizeof(*ev)); /* In case of device discovery with BR/EDR devices (pre 1.2), the * RSSI value was reported as 0 when not available. This behavior * is kept when using device discovery. This is required for full * backwards compatibility with the API. * * However when using service discovery, the value 127 will be * returned when the RSSI is not available. */ if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi && link_type == ACL_LINK) rssi = 0; bacpy(&ev->addr.bdaddr, bdaddr); ev->addr.type = link_to_bdaddr(link_type, addr_type); ev->rssi = rssi; ev->flags = cpu_to_le32(flags); if (eir_len > 0) /* Copy EIR or advertising data into event */ skb_put_data(skb, eir, eir_len); if (dev_class && !eir_get_data(eir, eir_len, EIR_CLASS_OF_DEV, NULL)) { u8 eir_cod[5]; eir_len += eir_append_data(eir_cod, 0, EIR_CLASS_OF_DEV, dev_class, 3); skb_put_data(skb, eir_cod, sizeof(eir_cod)); } if (scan_rsp_len > 0) /* Append scan response data to event */ skb_put_data(skb, scan_rsp, scan_rsp_len); ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); mgmt_adv_monitor_device_found(hdev, bdaddr, report_device, skb, NULL); } void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len) { struct sk_buff *skb; struct mgmt_ev_device_found *ev; u16 eir_len = 0; u32 flags = 0; skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); ev->addr.type = link_to_bdaddr(link_type, addr_type); ev->rssi = rssi; if (name) eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len); else flags = MGMT_DEV_FOUND_NAME_REQUEST_FAILED; ev->eir_len = cpu_to_le16(eir_len); ev->flags = cpu_to_le32(flags); mgmt_event_skb(skb, NULL); } void mgmt_discovering(struct hci_dev *hdev, u8 discovering) { struct mgmt_ev_discovering ev; bt_dev_dbg(hdev, "discovering %u", discovering); memset(&ev, 0, sizeof(ev)); ev.type = hdev->discovery.type; ev.discovering = discovering; mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } void mgmt_suspending(struct hci_dev *hdev, u8 state) { struct mgmt_ev_controller_suspend ev; ev.suspend_state = state; mgmt_event(MGMT_EV_CONTROLLER_SUSPEND, hdev, &ev, sizeof(ev), NULL); } void mgmt_resuming(struct hci_dev *hdev, u8 reason, bdaddr_t *bdaddr, u8 addr_type) { struct mgmt_ev_controller_resume ev; ev.wake_reason = reason; if (bdaddr) { bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = addr_type; } else { memset(&ev.addr, 0, sizeof(ev.addr)); } mgmt_event(MGMT_EV_CONTROLLER_RESUME, hdev, &ev, sizeof(ev), NULL); } static struct hci_mgmt_chan chan = { .channel = HCI_CHANNEL_CONTROL, .handler_count = ARRAY_SIZE(mgmt_handlers), .handlers = mgmt_handlers, .hdev_init = mgmt_init_hdev, }; int mgmt_init(void) { return hci_mgmt_chan_register(&chan); } void mgmt_exit(void) { hci_mgmt_chan_unregister(&chan); } void mgmt_cleanup(struct sock *sk) { struct mgmt_mesh_tx *mesh_tx; struct hci_dev *hdev; read_lock(&hci_dev_list_lock); list_for_each_entry(hdev, &hci_dev_list, list) { do { mesh_tx = mgmt_mesh_next(hdev, sk); if (mesh_tx) mesh_send_complete(hdev, mesh_tx, true); } while (mesh_tx); } read_unlock(&hci_dev_list_lock); } |
1 1 1 1 1 1 1 1 1 1 1 2 2 2 8 8 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 | // SPDX-License-Identifier: GPL-2.0-or-later /* incoming call handling * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/errqueue.h> #include <linux/udp.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/icmp.h> #include <linux/gfp.h> #include <linux/circ_buf.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> #include "ar-internal.h" static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, unsigned long user_call_ID) { } /* * Preallocate a single service call, connection and peer and, if possible, * give them a user ID and attach the user's side of the ID to them. */ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, struct rxrpc_backlog *b, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); struct rb_node *parent, **pp; int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; max = rx->sk.sk_max_ack_backlog; tmp = rx->sk.sk_ack_backlog; if (tmp >= max) { _leave(" = -ENOBUFS [full %u]", max); return -ENOBUFS; } max -= tmp; /* We don't need more conns and peers than we have calls, but on the * other hand, we shouldn't ever use more peers than conns or conns * than calls. */ call_head = b->call_backlog_head; call_tail = READ_ONCE(b->call_backlog_tail); tmp = CIRC_CNT(call_head, call_tail, size); if (tmp >= max) { _leave(" = -ENOBUFS [enough %u]", tmp); return -ENOBUFS; } max = tmp + 1; head = b->peer_backlog_head; tail = READ_ONCE(b->peer_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_peer *peer; peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc); if (!peer) return -ENOMEM; b->peer_backlog[head] = peer; smp_store_release(&b->peer_backlog_head, (head + 1) & (size - 1)); } head = b->conn_backlog_head; tail = READ_ONCE(b->conn_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_connection *conn; conn = rxrpc_prealloc_service_connection(rxnet, gfp); if (!conn) return -ENOMEM; b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); } /* Now it gets complicated, because calls get registered with the * socket here, with a user ID preassigned by the user. */ call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC); __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), user_call_ID, rxrpc_call_new_prealloc_service); write_lock(&rx->call_lock); /* Check the user ID isn't already in use */ pp = &rx->calls.rb_node; parent = NULL; while (*pp) { parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); if (user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto id_in_use; } call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; if (user_attach_call) { rxrpc_get_call(call, rxrpc_call_get_kernel_service); user_attach_call(call, user_call_ID); } rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); set_bit(RXRPC_CALL_HAS_USERID, &call->flags); list_add(&call->sock_link, &rx->sock_calls); write_unlock(&rx->call_lock); rxnet = call->rxnet; spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); spin_unlock(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); return 0; id_in_use: write_unlock(&rx->call_lock); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; } /* * Allocate the preallocation buffers for incoming service calls. These must * be charged manually. */ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) { struct rxrpc_backlog *b = rx->backlog; if (!b) { b = kzalloc(sizeof(struct rxrpc_backlog), gfp); if (!b) return -ENOMEM; rx->backlog = b; } return 0; } /* * Discard the preallocation on a service. */ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) { struct rxrpc_backlog *b = rx->backlog; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); unsigned int size = RXRPC_BACKLOG_MAX, head, tail; if (!b) return; rx->backlog = NULL; /* Make sure that there aren't any incoming calls in progress before we * clear the preallocation buffers. */ spin_lock(&rx->incoming_lock); spin_unlock(&rx->incoming_lock); head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer); kfree(peer); tail = (tail + 1) & (size - 1); } head = b->conn_backlog_head; tail = b->conn_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_connection *conn = b->conn_backlog[tail]; write_lock(&rxnet->conn_lock); list_del(&conn->link); list_del(&conn->proc_link); write_unlock(&rxnet->conn_lock); kfree(conn); if (atomic_dec_and_test(&rxnet->nr_conns)) wake_up_var(&rxnet->nr_conns); tail = (tail + 1) & (size - 1); } head = b->call_backlog_head; tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; rcu_assign_pointer(call->socket, rx); if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); if (call->notify_rx) call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_discard_prealloc); tail = (tail + 1) & (size - 1); } kfree(b); } /* * Allocate a new incoming call from the prealloc pool, along with a connection * and a peer as necessary. */ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, const struct rxrpc_security *sec, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; struct rxrpc_call *call; unsigned short call_head, conn_head, peer_head; unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); conn_head = smp_load_acquire(&b->conn_backlog_head); conn_tail = b->conn_backlog_tail; conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); ASSERTCMP(conn_count, >=, call_count); peer_head = smp_load_acquire(&b->peer_backlog_head); peer_tail = b->peer_backlog_tail; ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, conn_count); if (call_count == 0) return NULL; if (!conn) { if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn)) peer = NULL; if (!peer) { peer = b->peer_backlog[peer_tail]; peer->srx = *peer_srx; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ conn = b->conn_backlog[conn_tail]; b->conn_backlog[conn_tail] = NULL; smp_store_release(&b->conn_backlog_tail, (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn); conn->peer = peer; rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn); rxrpc_new_incoming_connection(rx, conn, sec, skb); } else { rxrpc_get_connection(conn, rxrpc_conn_get_service_conn); atomic_inc(&conn->active); } /* And now we can allocate and set up a new call */ call = b->call_backlog[call_tail]; b->call_backlog[call_tail] = NULL; smp_store_release(&b->call_backlog_tail, (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); rxrpc_see_call(call, rxrpc_call_see_accept); call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call); call->conn = conn; call->security = conn->security; call->security_ix = conn->security_ix; call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept); call->dest_srx = peer->srx; call->cong_ssthresh = call->peer->cong_ssthresh; call->tx_last_sent = ktime_get_real(); return call; } /* * Set up a new incoming call. Called from the I/O thread. * * If this is for a kernel service, when we allocate the call, it will have * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the * retainer ref obtained from the backlog buffer. Prealloc calls for userspace * services only have the ref from the backlog buffer. * * If we want to report an error, we mark the skb with the packet type and * abort code and return false. */ bool rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { const struct rxrpc_security *sec = NULL; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_call *call = NULL; struct rxrpc_sock *rx; _enter(""); /* Don't set up a call for anything other than a DATA packet. */ if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); read_lock(&local->services_lock); /* Weed out packets to services we're not offering. Packets that would * begin a call are explicitly rejected and the rest are just * discarded. */ rx = local->service; if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && sp->hdr.serviceId != rx->second_service) ) { if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.seq == 1) goto unsupported_service; goto discard; } if (!conn) { sec = rxrpc_get_incoming_security(rx, skb); if (!sec) goto unsupported_security; } spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { rxrpc_direct_abort(skb, rxrpc_abort_shut_down, RX_INVALID_OPERATION, -ESHUTDOWN); goto no_call; } call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; goto no_call; } trace_rxrpc_receive(call, rxrpc_receive_incoming, sp->hdr.serial, sp->hdr.seq); /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); spin_lock(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) { conn->state = RXRPC_CONN_SERVICE_CHALLENGING; set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge); } spin_unlock(&conn->state_lock); spin_unlock(&rx->incoming_lock); read_unlock(&local->services_lock); if (hlist_unhashed(&call->error_link)) { spin_lock(&call->peer->lock); hlist_add_head(&call->error_link, &call->peer->error_targets); spin_unlock(&call->peer->lock); } _leave(" = %p{%d}", call, call->debug_id); rxrpc_input_call_event(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); return true; unsupported_service: read_unlock(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EOPNOTSUPP); unsupported_security: read_unlock(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); read_unlock(&local->services_lock); _leave(" = f [%u]", skb->mark); return false; discard: read_unlock(&local->services_lock); return true; } /* * Charge up socket with preallocated calls, attaching user call IDs. */ int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) { struct rxrpc_backlog *b = rx->backlog; if (rx->sk.sk_state == RXRPC_CLOSE) return -ESHUTDOWN; return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); } /* * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls * @sock: The socket on which to preallocate * @notify_rx: Event notification function for the call * @user_attach_call: Func to attach call to user_call_ID * @user_call_ID: The tag to attach to the preallocated call * @gfp: The allocation conditions. * @debug_id: The tracing debug ID. * * Charge up the socket with preallocated calls, each with a user ID. A * function should be provided to effect the attachment from the user's side. * The user is given a ref to hold on the call. * * Note that the call may be come connected before this function returns. */ int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct rxrpc_backlog *b = rx->backlog; if (sock->sk->sk_state == RXRPC_CLOSE) return -ESHUTDOWN; return rxrpc_service_prealloc_one(rx, b, notify_rx, user_attach_call, user_call_ID, gfp, debug_id); } EXPORT_SYMBOL(rxrpc_kernel_charge_accept); |
4 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM block #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BLOCK_H #include <linux/blktrace_api.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/tracepoint.h> #include <uapi/linux/ioprio.h> #define RWBS_LEN 8 #define IOPRIO_CLASS_STRINGS \ { IOPRIO_CLASS_NONE, "none" }, \ { IOPRIO_CLASS_RT, "rt" }, \ { IOPRIO_CLASS_BE, "be" }, \ { IOPRIO_CLASS_IDLE, "idle" }, \ { IOPRIO_CLASS_INVALID, "invalid"} #ifdef CONFIG_BUFFER_HEAD DECLARE_EVENT_CLASS(block_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh), TP_STRUCT__entry ( __field( dev_t, dev ) __field( sector_t, sector ) __field( size_t, size ) ), TP_fast_assign( __entry->dev = bh->b_bdev->bd_dev; __entry->sector = bh->b_blocknr; __entry->size = bh->b_size; ), TP_printk("%d,%d sector=%llu size=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->sector, __entry->size ) ); /** * block_touch_buffer - mark a buffer accessed * @bh: buffer_head being touched * * Called from touch_buffer(). */ DEFINE_EVENT(block_buffer, block_touch_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); /** * block_dirty_buffer - mark a buffer dirty * @bh: buffer_head being dirtied * * Called from mark_buffer_dirty(). */ DEFINE_EVENT(block_buffer, block_dirty_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); #endif /* CONFIG_BUFFER_HEAD */ /** * block_rq_requeue - place block IO request back on a queue * @rq: block IO operation request * * The block operation request @rq is being placed back into queue * @q. For some reason the request was not completed and needs to be * put back in the queue. */ TRACE_EVENT(block_rq_requeue, TP_PROTO(struct request *rq), TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->ioprio = rq->ioprio; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), 0) ); DECLARE_EVENT_CLASS(block_rq_completion, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int , error ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = blk_status_to_errno(error); __entry->ioprio = rq->ioprio; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error) ); /** * block_rq_complete - block IO operation completed by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If * the @rq->bio is %NULL, then there is absolutely no additional work to * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ DEFINE_EVENT(block_rq_completion, block_rq_complete, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes) ); /** * block_rq_error - block IO operation error reported by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_error tracepoint event indicates that some portion * of operation request has failed as reported by the device driver. */ DEFINE_EVENT(block_rq_completion, block_rq_error, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes) ); DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request *rq), TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); __entry->ioprio = rq->ioprio; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm) ); /** * block_rq_insert - insert block operation request into queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted * into queue @q. The fields in the operation request @rq struct can * be examined to determine which device and sectors the pending * operation would access. */ DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver * @rq: block IO operation request * * Called when block operation request @rq from queue @q is sent to a * device driver for processing. */ DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_rq_merge - merge request with another one in the elevator * @rq: block IO operation request * * Called when block operation request @rq from queue @q is merged to another * request queued in the elevator. */ DEFINE_EVENT(block_rq, block_rq_merge, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_io_start - insert a request for execution * @rq: block IO operation request * * Called when block operation request @rq is queued for execution */ DEFINE_EVENT(block_rq, block_io_start, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_io_done - block IO operation request completed * @rq: block IO operation request * * Called when block operation request @rq is completed */ DEFINE_EVENT(block_rq, block_io_done, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. */ TRACE_EVENT(block_bio_complete, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = blk_status_to_errno(bio->bi_status); blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct bio *bio), TP_ARGS(bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_bio_bounce - used bounce buffer when processing block operation * @bio: block operation * * A bounce buffer was used to handle the block operation @bio in @q. * This occurs when hardware limitations prevent a direct transfer of * data between the @bio data memory area and the IO device. Use of a * bounce buffer requires extra copying of data and decreases * performance. */ DEFINE_EVENT(block_bio, block_bio_bounce, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_bio_backmerge - merging block operation to the end of an existing operation * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request. */ DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block request. */ DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_bio_queue - putting new block IO operation in queue * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ DEFINE_EVENT(block_bio, block_bio_queue, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_getrq - get a free request entry in queue for block IO operations * @bio: pending block IO operation (can be %NULL) * * A request struct has been allocated to handle the block IO operation @bio. */ DEFINE_EVENT(block_bio, block_getrq, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_plug - keep operations requests in request queue * @q: request queue to plug * * Plug the request queue @q. Do not allow block operation requests * to be sent to the device driver. Instead, accumulate requests in * the queue to improve throughput performance of the block device. */ TRACE_EVENT(block_plug, TP_PROTO(struct request_queue *q), TP_ARGS(q), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s]", __entry->comm) ); DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit), TP_STRUCT__entry( __field( int, nr_rq ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); /** * block_unplug - release of operations requests in request queue * @q: request queue to unplug * @depth: number of requests just added to the queue * @explicit: whether this was an explicit unplug, or one from schedule() * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit) ); /** * block_split - split a single bio struct into two bio structs * @bio: block operation being split * @new_sector: The starting sector for the new bio * * The bio request @bio needs to be split into two bio requests. The newly * created @bio request starts at @new_sector. This split may be required due to * hardware limitations such as operation crossing device boundaries in a RAID * system. */ TRACE_EVENT(block_split, TP_PROTO(struct bio *bio, unsigned int new_sector), TP_ARGS(bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; blk_fill_rwbs(__entry->rwbs, bio->bi_opf); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu / %llu [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, (unsigned long long)__entry->new_sector, __entry->comm) ); /** * block_bio_remap - map request for a logical device to the raw device * @bio: revised operation * @dev: original device for the operation * @from: original sector for the operation * * An operation for a logical device has been mapped to the * raw block device. */ TRACE_EVENT(block_bio_remap, TP_PROTO(struct bio *bio, dev_t dev, sector_t from), TP_ARGS(bio, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector) ); /** * block_rq_remap - map request for a block operation request * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation * * The block operation request @rq in @q has been remapped. The block * operation request @rq holds the current information and @from hold * the original sector. */ TRACE_EVENT(block_rq_remap, TP_PROTO(struct request *rq, dev_t dev, sector_t from), TP_ARGS(rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __field( unsigned int, nr_bios ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = disk_devt(rq->q->disk); __entry->sector = blk_rq_pos(rq); __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; __entry->nr_bios = blk_rq_count_bios(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector, __entry->nr_bios) ); #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM mmap_lock #if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MMAP_LOCK_H #include <linux/tracepoint.h> #include <linux/types.h> struct mm_struct; extern int trace_mmap_lock_reg(void); extern void trace_mmap_lock_unreg(void); DECLARE_EVENT_CLASS(mmap_lock, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), TP_ARGS(mm, memcg_path, write), TP_STRUCT__entry( __field(struct mm_struct *, mm) __string(memcg_path, memcg_path) __field(bool, write) ), TP_fast_assign( __entry->mm = mm; __assign_str(memcg_path); __entry->write = write; ), TP_printk( "mm=%p memcg_path=%s write=%s", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false" ) ); #define DEFINE_MMAP_LOCK_EVENT(name) \ DEFINE_EVENT_FN(mmap_lock, name, \ TP_PROTO(struct mm_struct *mm, const char *memcg_path, \ bool write), \ TP_ARGS(mm, memcg_path, write), \ trace_mmap_lock_reg, trace_mmap_lock_unreg) DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking); DEFINE_MMAP_LOCK_EVENT(mmap_lock_released); TRACE_EVENT_FN(mmap_lock_acquire_returned, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, bool success), TP_ARGS(mm, memcg_path, write, success), TP_STRUCT__entry( __field(struct mm_struct *, mm) __string(memcg_path, memcg_path) __field(bool, write) __field(bool, success) ), TP_fast_assign( __entry->mm = mm; __assign_str(memcg_path); __entry->write = write; __entry->success = success; ), TP_printk( "mm=%p memcg_path=%s write=%s success=%s", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false", __entry->success ? "true" : "false" ), trace_mmap_lock_reg, trace_mmap_lock_unreg ); #endif /* _TRACE_MMAP_LOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:mac type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/if_ether.h> #include <net/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 #define IPSET_TYPE_REV_MAX 1 /* bucketsize, initval support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:mac"); /* Type specific function prefix */ #define HTYPE hash_mac /* Member elements */ struct hash_mac4_elem { /* Zero valued IP addresses cannot be stored */ union { unsigned char ether[ETH_ALEN]; __be32 foo[2]; }; }; /* Common functions */ static bool hash_mac4_data_equal(const struct hash_mac4_elem *e1, const struct hash_mac4_elem *e2, u32 *multi) { return ether_addr_equal(e1->ether, e2->ether); } static bool hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e) { if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_mac4_data_next(struct hash_mac4_elem *next, const struct hash_mac4_elem *e) { } #define MTYPE hash_mac4 #define HOST_MASK 32 #define IP_SET_EMIT_CREATE #define IP_SET_PROTO_UNDEF #include "ip_set_hash_gen.h" static int hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; if (opt->flags & IPSET_DIM_ONE_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); if (is_zero_ether_addr(e.ether)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_ETHER] || nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN)) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER])); if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags); } static struct ip_set_type hash_mac_type __read_mostly = { .name = "hash:mac", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_MAC, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_mac_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_mac_init(void) { return ip_set_type_register(&hash_mac_type); } static void __exit hash_mac_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_mac_type); } module_init(hash_mac_init); module_exit(hash_mac_fini); |
34 6 9 12 8 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_reject.h> #include <net/netfilter/ipv4/nf_reject.h> #include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_inet_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); switch (nft_pf(pkt)) { case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: nf_send_reset(nft_net(pkt), nft_sk(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, nft_reject_icmp_code(priv->icmp_code), nft_hook(pkt)); break; } break; case NFPROTO_IPV6: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: nf_send_reset6(nft_net(pkt), nft_sk(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach6(nft_net(pkt), pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), nft_hook(pkt)); break; } break; } regs->verdict.code = NF_DROP; } static int nft_reject_inet_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_INGRESS)); } static struct nft_expr_type nft_reject_inet_type; static const struct nft_expr_ops nft_reject_inet_ops = { .type = &nft_reject_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_inet_eval, .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_inet_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { .family = NFPROTO_INET, .name = "reject", .ops = &nft_reject_inet_ops, .policy = nft_reject_policy, .maxattr = NFTA_REJECT_MAX, .owner = THIS_MODULE, }; static int __init nft_reject_inet_module_init(void) { return nft_register_expr(&nft_reject_inet_type); } static void __exit nft_reject_inet_module_exit(void) { nft_unregister_expr(&nft_reject_inet_type); } module_init(nft_reject_inet_module_init); module_exit(nft_reject_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_AF_EXPR(1, "reject"); MODULE_DESCRIPTION("Netfilter nftables reject inet support"); |
82 60 18 62 7 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | /* SPDX-License-Identifier: GPL-2.0 */ /* * connection tracking event cache. */ #ifndef _NF_CONNTRACK_ECACHE_H #define _NF_CONNTRACK_ECACHE_H #include <net/netfilter/nf_conntrack.h> #include <net/net_namespace.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/netfilter/nf_conntrack_extend.h> enum nf_ct_ecache_state { NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ }; struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ u32 missed; /* missed events */ u32 portid; /* netlink portid of destroyer */ }; static inline struct nf_conntrack_ecache * nf_ct_ecache_find(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); #else return NULL; #endif } static inline bool nf_ct_ecache_exist(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS return nf_ct_ext_exist(ct, NF_CT_EXT_ECACHE); #else return false; #endif } #ifdef CONFIG_NF_CONNTRACK_EVENTS /* This structure is passed to event handler */ struct nf_ct_event { struct nf_conn *ct; u32 portid; int report; }; struct nf_exp_event { struct nf_conntrack_expect *exp; u32 portid; int report; }; struct nf_ct_event_notifier { int (*ct_event)(unsigned int events, const struct nf_ct_event *item); int (*exp_event)(unsigned int events, const struct nf_exp_event *item); }; void nf_conntrack_register_notifier(struct net *net, const struct nf_ct_event_notifier *nb); void nf_conntrack_unregister_notifier(struct net *net); void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report); bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp); #else static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) { } static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report) { return 0; } static inline bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { return false; } #endif static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return; e = nf_ct_ecache_find(ct); if (e == NULL) return; set_bit(event, &e->cache); #endif } static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, u32 portid, int report) { #ifdef CONFIG_NF_CONNTRACK_EVENTS if (nf_ct_ecache_exist(ct)) return nf_conntrack_eventmask_report(1 << event, ct, portid, report); #endif return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS if (nf_ct_ecache_exist(ct)) return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); #endif return 0; } #ifdef CONFIG_NF_CONNTRACK_EVENTS void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report); void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net); static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return net->ct.ecache_dwork_pending; } #else /* CONFIG_NF_CONNTRACK_EVENTS */ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, struct nf_conntrack_expect *exp, u32 portid, int report) { } static inline void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state s) { } static inline void nf_conntrack_ecache_pernet_init(struct net *net) { } static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { } static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ |
2 2 2 2 2 7 9 1 1 19 1 1 1 3 10 11 1 10 2 11 1 10 2 9 1 8 1 6 4 2 4 4 6 1 9 2 7 7 2 2 7 4 4 4 4 2 2 5 5 1 3 1 4 3 1 3 1 4 4 4 5 5 5 5 7 7 5 2 2 7 5 2 2 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016, Amir Vadai <amir@vadai.me> * Copyright (c) 2016, Mellanox Technologies. All rights reserved. */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/geneve.h> #include <net/vxlan.h> #include <net/erspan.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> static struct tc_action_ops act_tunnel_key_ops; TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; int action; params = rcu_dereference_bh(t->params); tcf_lastuse_update(&t->tcf_tm); tcf_action_update_bstats(&t->common, skb); action = READ_ONCE(t->tcf_action); switch (params->tcft_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: skb_dst_drop(skb); break; case TCA_TUNNEL_KEY_ACT_SET: skb_dst_drop(skb); skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); break; default: WARN_ONCE(1, "Bad tunnel_key action %d.\n", params->tcft_action); break; } return action; } static const struct nla_policy enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_OPTS_UNSPEC] = { .strict_start_type = TCA_TUNNEL_KEY_ENC_OPTS_VXLAN }, [TCA_TUNNEL_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, [TCA_TUNNEL_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED }, [TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED }, }; static const struct nla_policy geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, }; static const struct nla_policy vxlan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP] = { .type = NLA_U32 }, }; static const struct nla_policy erspan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX] = { .type = NLA_U32 }, [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, }; static int tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1]; int err, data_len, opt_len; u8 *data; err = nla_parse_nested_deprecated(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, nla, geneve_opt_policy, extack); if (err < 0) return err; if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] || !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] || !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) { NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data"); return -EINVAL; } data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]); data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]); if (data_len < 4) { NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long"); return -ERANGE; } if (data_len % 4) { NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long"); return -ERANGE; } opt_len = sizeof(struct geneve_opt) + data_len; if (dst) { struct geneve_opt *opt = dst; WARN_ON(dst_len < opt_len); opt->opt_class = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]); opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]); opt->length = data_len / 4; /* length is in units of 4 bytes */ opt->r1 = 0; opt->r2 = 0; opt->r3 = 0; memcpy(opt + 1, data, data_len); } return opt_len; } static int tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1]; int err; err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX, nla, vxlan_opt_policy, extack); if (err < 0) return err; if (!tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]) { NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp"); return -EINVAL; } if (dst) { struct vxlan_metadata *md = dst; md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]); md->gbp &= VXLAN_GBP_MASK; } return sizeof(struct vxlan_metadata); } static int tunnel_key_copy_erspan_opt(const struct nlattr *nla, void *dst, int dst_len, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1]; int err; u8 ver; err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX, nla, erspan_opt_policy, extack); if (err < 0) return err; if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]) { NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver"); return -EINVAL; } ver = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]); if (ver == 1) { if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX]) { NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index"); return -EINVAL; } } else if (ver == 2) { if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR] || !tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID]) { NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid"); return -EINVAL; } } else { NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect"); return -EINVAL; } if (dst) { struct erspan_metadata *md = dst; md->version = ver; if (ver == 1) { nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX]; md->u.index = nla_get_be32(nla); } else { nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR]; md->u.md2.dir = nla_get_u8(nla); nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID]; set_hwid(&md->u.md2, nla_get_u8(nla)); } } return sizeof(struct erspan_metadata); } static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, int dst_len, struct netlink_ext_ack *extack) { int err, rem, opt_len, len = nla_len(nla), opts_len = 0, type = 0; const struct nlattr *attr, *head = nla_data(nla); err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, enc_opts_policy, extack); if (err) return err; nla_for_each_attr(attr, head, len, rem) { switch (nla_type(attr)) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); return -EINVAL; } opt_len = tunnel_key_copy_geneve_opt(attr, dst, dst_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; if (opts_len > IP_TUNNEL_OPTS_MAX) { NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); return -EINVAL; } if (dst) { dst_len -= opt_len; dst += opt_len; } type = IP_TUNNEL_GENEVE_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: if (type) { NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options"); return -EINVAL; } opt_len = tunnel_key_copy_vxlan_opt(attr, dst, dst_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; type = IP_TUNNEL_VXLAN_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: if (type) { NL_SET_ERR_MSG(extack, "Duplicate type for erspan options"); return -EINVAL; } opt_len = tunnel_key_copy_erspan_opt(attr, dst, dst_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; type = IP_TUNNEL_ERSPAN_OPT_BIT; break; } } if (!opts_len) { NL_SET_ERR_MSG(extack, "Empty list of tunnel options"); return -EINVAL; } if (rem > 0) { NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes"); return -EINVAL; } return opts_len; } static int tunnel_key_get_opts_len(struct nlattr *nla, struct netlink_ext_ack *extack) { return tunnel_key_copy_opts(nla, NULL, 0, extack); } static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, int opts_len, struct netlink_ext_ack *extack) { info->options_len = opts_len; switch (nla_type(nla_data(nla))) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: #if IS_ENABLED(CONFIG_INET) __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else return -EAFNOSUPPORT; #endif case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: #if IS_ENABLED(CONFIG_INET) __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else return -EAFNOSUPPORT; #endif case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: #if IS_ENABLED(CONFIG_INET) __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else return -EAFNOSUPPORT; #endif default: NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type"); return -EINVAL; } } static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16}, [TCA_TUNNEL_KEY_NO_CSUM] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_OPTS] = { .type = NLA_NESTED }, [TCA_TUNNEL_KEY_ENC_TOS] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 }, }; static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) { if (!p) return; if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) dst_release(&p->tcft_enc_metadata->dst); kfree_rcu(p, rcu); } static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *metadata = NULL; struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; __be16 dst_port = 0; __be64 key_id = 0; int opts_len = 0; u8 tos, ttl; int ret = 0; u32 index; int err; if (!nla) { NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed"); return -EINVAL; } err = nla_parse_nested_deprecated(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes"); return err; } if (!tb[TCA_TUNNEL_KEY_PARMS]) { NL_SET_ERR_MSG(extack, "Missing tunnel key parameters"); return -EINVAL; } parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; switch (parm->t_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: break; case TCA_TUNNEL_KEY_ACT_SET: if (tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { __be32 key32; key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]); key_id = key32_to_tunnel_id(key32); __set_bit(IP_TUNNEL_KEY_BIT, flags); } __set_bit(IP_TUNNEL_CSUM_BIT, flags); if (tb[TCA_TUNNEL_KEY_NO_CSUM] && nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM])) __clear_bit(IP_TUNNEL_CSUM_BIT, flags); if (nla_get_flag(tb[TCA_TUNNEL_KEY_NO_FRAG])) __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, flags); if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) { opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS], extack); if (opts_len < 0) { ret = opts_len; goto err_out; } } tos = 0; if (tb[TCA_TUNNEL_KEY_ENC_TOS]) tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]); ttl = 0; if (tb[TCA_TUNNEL_KEY_ENC_TTL]) ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]); if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { __be32 saddr; __be32 daddr; saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl, dst_port, flags, key_id, opts_len); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { struct in6_addr saddr; struct in6_addr daddr; saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port, 0, flags, key_id, opts_len); } else { NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst"); ret = -EINVAL; goto err_out; } if (!metadata) { NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst"); ret = -ENOMEM; goto err_out; } #ifdef CONFIG_DST_CACHE ret = dst_cache_init(&metadata->u.tun_info.dst_cache, GFP_KERNEL); if (ret) goto release_tun_meta; #endif if (opts_len) { ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS], &metadata->u.tun_info, opts_len, extack); if (ret < 0) goto release_tun_meta; } metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel key action"); ret = -EINVAL; goto err_out; } if (!exists) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_tunnel_key_ops, bind, act_flags); if (ret) { NL_SET_ERR_MSG(extack, "Cannot create TC IDR"); goto release_tun_meta; } ret = ACT_P_CREATED; } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "TC IDR already exists"); ret = -EEXIST; goto release_tun_meta; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) { ret = err; exists = true; goto release_tun_meta; } t = to_tunnel_key(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); ret = -ENOMEM; exists = true; goto put_chain; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; spin_lock_bh(&t->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params_new = rcu_replace_pointer(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); tunnel_key_release_params(params_new); if (goto_ch) tcf_chain_put_by_act(goto_ch); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_tun_meta: if (metadata) dst_release(&metadata->dst); err_out: if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return ret; } static void tunnel_key_release(struct tc_action *a) { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); tunnel_key_release_params(params); } static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { int len = info->options_len; u8 *src = (u8 *)(info + 1); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); if (!start) return -EMSGSIZE; while (len > 0) { struct geneve_opt *opt = (struct geneve_opt *)src; if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS, opt->opt_class) || nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE, opt->type) || nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA, opt->length * 4, opt + 1)) { nla_nest_cancel(skb, start); return -EMSGSIZE; } len -= sizeof(struct geneve_opt) + opt->length * 4; src += sizeof(struct geneve_opt) + opt->length * 4; } nla_nest_end(skb, start); return 0; } static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN); if (!start) return -EMSGSIZE; if (nla_put_u32(skb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP, md->gbp)) { nla_nest_cancel(skb, start); return -EMSGSIZE; } nla_nest_end(skb, start); return 0; } static int tunnel_key_erspan_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { struct erspan_metadata *md = (struct erspan_metadata *)(info + 1); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN); if (!start) return -EMSGSIZE; if (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER, md->version)) goto err; if (md->version == 1 && nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index)) goto err; if (md->version == 2 && (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR, md->u.md2.dir) || nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID, get_hwid(&md->u.md2)))) goto err; nla_nest_end(skb, start); return 0; err: nla_nest_cancel(skb, start); return -EMSGSIZE; } static int tunnel_key_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { struct nlattr *start; int err = -EINVAL; if (!info->options_len) return 0; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS); if (!start) return -EMSGSIZE; if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_geneve_opts_dump(skb, info); if (err) goto err_out; } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_vxlan_opts_dump(skb, info); if (err) goto err_out; } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_erspan_opts_dump(skb, info); if (err) goto err_out; } else { err_out: nla_nest_cancel(skb, start); return err; } nla_nest_end(skb, start); return 0; } static int tunnel_key_dump_addresses(struct sk_buff *skb, const struct ip_tunnel_info *info) { unsigned short family = ip_tunnel_info_af(info); if (family == AF_INET) { __be32 saddr = info->key.u.ipv4.src; __be32 daddr = info->key.u.ipv4.dst; if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) && !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr)) return 0; } if (family == AF_INET6) { const struct in6_addr *saddr6 = &info->key.u.ipv6.src; const struct in6_addr *daddr6 = &info->key.u.ipv6.dst; if (!nla_put_in6_addr(skb, TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) && !nla_put_in6_addr(skb, TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6)) return 0; } return -EINVAL; } static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; struct tc_tunnel_key opt = { .index = t->tcf_index, .refcnt = refcount_read(&t->tcf_refcnt) - ref, .bindcnt = atomic_read(&t->tcf_bindcnt) - bind, }; struct tcf_t tm; spin_lock_bh(&t->tcf_lock); params = rcu_dereference_protected(t->params, lockdep_is_held(&t->tcf_lock)); opt.action = t->tcf_action; opt.t_action = params->tcft_action; if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { struct ip_tunnel_info *info = ¶ms->tcft_enc_metadata->u.tun_info; struct ip_tunnel_key *key = &info->key; __be32 key_id = tunnel_id_to_key32(key->tun_id); if ((test_bit(IP_TUNNEL_KEY_BIT, key->tun_flags) && nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) || tunnel_key_dump_addresses(skb, ¶ms->tcft_enc_metadata->u.tun_info) || (key->tp_dst && nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) || nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, !test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) || (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) && nla_put_flag(skb, TCA_TUNNEL_KEY_NO_FRAG)) || tunnel_key_opts_dump(skb, info)) goto nla_put_failure; if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos)) goto nla_put_failure; if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl)) goto nla_put_failure; } tcf_tm_dump(&tm, &t->tcf_tm); if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), &tm, TCA_TUNNEL_KEY_PAD)) goto nla_put_failure; spin_unlock_bh(&t->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&t->tcf_lock); nlmsg_trim(skb, b); return -1; } static void tcf_tunnel_encap_put_tunnel(void *priv) { struct ip_tunnel_info *tunnel = priv; kfree(tunnel); } static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, const struct tc_action *act) { entry->tunnel = tcf_tunnel_info_copy(act); if (!entry->tunnel) return -ENOMEM; entry->destructor = tcf_tunnel_encap_put_tunnel; entry->destructor_priv = entry->tunnel; return 0; } static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { int err; if (bind) { struct flow_action_entry *entry = entry_data; if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; err = tcf_tunnel_encap_get_tunnel(entry, act); if (err) return err; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else { NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel key mode offload"); return -EOPNOTSUPP; } *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; if (is_tcf_tunnel_set(act)) fl_action->id = FLOW_ACTION_TUNNEL_ENCAP; else if (is_tcf_tunnel_release(act)) fl_action->id = FLOW_ACTION_TUNNEL_DECAP; else return -EOPNOTSUPP; } return 0; } static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", .id = TCA_ID_TUNNEL_KEY, .owner = THIS_MODULE, .act = tunnel_key_act, .dump = tunnel_key_dump, .init = tunnel_key_init, .cleanup = tunnel_key_release, .offload_act_setup = tcf_tunnel_key_offload_act_setup, .size = sizeof(struct tcf_tunnel_key), }; MODULE_ALIAS_NET_ACT("tunnel_key"); static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); return tc_action_net_init(net, tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_tunnel_key_ops.net_id); } static struct pernet_operations tunnel_key_net_ops = { .init = tunnel_key_init_net, .exit_batch = tunnel_key_exit_net, .id = &act_tunnel_key_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init tunnel_key_init_module(void) { return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops); } static void __exit tunnel_key_cleanup_module(void) { tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops); } module_init(tunnel_key_init_module); module_exit(tunnel_key_cleanup_module); MODULE_AUTHOR("Amir Vadai <amir@vadai.me>"); MODULE_DESCRIPTION("ip tunnel manipulation actions"); MODULE_LICENSE("GPL v2"); |
1011 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/pagevec.h * * In many places it is efficient to batch an operation up against multiple * folios. A folio_batch is a container which is used for that. */ #ifndef _LINUX_PAGEVEC_H #define _LINUX_PAGEVEC_H #include <linux/types.h> /* 31 pointers + header align the folio_batch structure to a power of two */ #define PAGEVEC_SIZE 31 struct folio; /** * struct folio_batch - A collection of folios. * * The folio_batch is used to amortise the cost of retrieving and * operating on a set of folios. The order of folios in the batch may be * significant (eg delete_from_page_cache_batch()). Some users of the * folio_batch store "exceptional" entries in it which can be removed * by calling folio_batch_remove_exceptionals(). */ struct folio_batch { unsigned char nr; unsigned char i; bool percpu_pvec_drained; struct folio *folios[PAGEVEC_SIZE]; }; /** * folio_batch_init() - Initialise a batch of folios * @fbatch: The folio batch. * * A freshly initialised folio_batch contains zero folios. */ static inline void folio_batch_init(struct folio_batch *fbatch) { fbatch->nr = 0; fbatch->i = 0; fbatch->percpu_pvec_drained = false; } static inline void folio_batch_reinit(struct folio_batch *fbatch) { fbatch->nr = 0; fbatch->i = 0; } static inline unsigned int folio_batch_count(struct folio_batch *fbatch) { return fbatch->nr; } static inline unsigned int folio_batch_space(struct folio_batch *fbatch) { return PAGEVEC_SIZE - fbatch->nr; } /** * folio_batch_add() - Add a folio to a batch. * @fbatch: The folio batch. * @folio: The folio to add. * * The folio is added to the end of the batch. * The batch must have previously been initialised using folio_batch_init(). * * Return: The number of slots still available. */ static inline unsigned folio_batch_add(struct folio_batch *fbatch, struct folio *folio) { fbatch->folios[fbatch->nr++] = folio; return folio_batch_space(fbatch); } /** * folio_batch_next - Return the next folio to process. * @fbatch: The folio batch being processed. * * Use this function to implement a queue of folios. * * Return: The next folio in the queue, or NULL if the queue is empty. */ static inline struct folio *folio_batch_next(struct folio_batch *fbatch) { if (fbatch->i == fbatch->nr) return NULL; return fbatch->folios[fbatch->i++]; } void __folio_batch_release(struct folio_batch *pvec); static inline void folio_batch_release(struct folio_batch *fbatch) { if (folio_batch_count(fbatch)) __folio_batch_release(fbatch); } void folio_batch_remove_exceptionals(struct folio_batch *fbatch); #endif /* _LINUX_PAGEVEC_H */ |
39 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Symmetric key ciphers. * * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_SKCIPHER_H #define _CRYPTO_INTERNAL_SKCIPHER_H #include <crypto/algapi.h> #include <crypto/internal/cipher.h> #include <crypto/skcipher.h> #include <linux/list.h> #include <linux/types.h> /* * Set this if your algorithm is sync but needs a reqsize larger * than MAX_SYNC_SKCIPHER_REQSIZE. * * Reuse bit that is specific to hash algorithms. */ #define CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE CRYPTO_ALG_OPTIONAL_KEY struct aead_request; struct rtattr; struct skcipher_instance { void (*free)(struct skcipher_instance *inst); union { struct { char head[offsetof(struct skcipher_alg, base)]; struct crypto_instance base; } s; struct skcipher_alg alg; }; }; struct lskcipher_instance { void (*free)(struct lskcipher_instance *inst); union { struct { char head[offsetof(struct lskcipher_alg, co.base)]; struct crypto_instance base; } s; struct lskcipher_alg alg; }; }; struct crypto_skcipher_spawn { struct crypto_spawn base; }; struct crypto_lskcipher_spawn { struct crypto_spawn base; }; struct skcipher_walk { union { struct { struct page *page; unsigned long offset; } phys; struct { u8 *page; void *addr; } virt; } src, dst; struct scatter_walk in; unsigned int nbytes; struct scatter_walk out; unsigned int total; struct list_head buffers; u8 *page; u8 *buffer; u8 *oiv; void *iv; unsigned int ivsize; int flags; unsigned int blocksize; unsigned int stride; unsigned int alignmask; }; static inline struct crypto_instance *skcipher_crypto_instance( struct skcipher_instance *inst) { return &inst->s.base; } static inline struct crypto_instance *lskcipher_crypto_instance( struct lskcipher_instance *inst) { return &inst->s.base; } static inline struct skcipher_instance *skcipher_alg_instance( struct crypto_skcipher *skcipher) { return container_of(crypto_skcipher_alg(skcipher), struct skcipher_instance, alg); } static inline struct lskcipher_instance *lskcipher_alg_instance( struct crypto_lskcipher *lskcipher) { return container_of(crypto_lskcipher_alg(lskcipher), struct lskcipher_instance, alg); } static inline void *skcipher_instance_ctx(struct skcipher_instance *inst) { return crypto_instance_ctx(skcipher_crypto_instance(inst)); } static inline void *lskcipher_instance_ctx(struct lskcipher_instance *inst) { return crypto_instance_ctx(lskcipher_crypto_instance(inst)); } static inline void skcipher_request_complete(struct skcipher_request *req, int err) { crypto_request_complete(&req->base, err); } int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); int crypto_grab_lskcipher(struct crypto_lskcipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline void crypto_drop_lskcipher(struct crypto_lskcipher_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct lskcipher_alg *crypto_lskcipher_spawn_alg( struct crypto_lskcipher_spawn *spawn) { return container_of(spawn->base.alg, struct lskcipher_alg, co.base); } static inline struct skcipher_alg_common *crypto_spawn_skcipher_alg_common( struct crypto_skcipher_spawn *spawn) { return container_of(spawn->base.alg, struct skcipher_alg_common, base); } static inline struct lskcipher_alg *crypto_spawn_lskcipher_alg( struct crypto_lskcipher_spawn *spawn) { return crypto_lskcipher_spawn_alg(spawn); } static inline struct crypto_skcipher *crypto_spawn_skcipher( struct crypto_skcipher_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline struct crypto_lskcipher *crypto_spawn_lskcipher( struct crypto_lskcipher_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline void crypto_skcipher_set_reqsize( struct crypto_skcipher *skcipher, unsigned int reqsize) { skcipher->reqsize = reqsize; } static inline void crypto_skcipher_set_reqsize_dma( struct crypto_skcipher *skcipher, unsigned int reqsize) { reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1); skcipher->reqsize = reqsize; } int crypto_register_skcipher(struct skcipher_alg *alg); void crypto_unregister_skcipher(struct skcipher_alg *alg); int crypto_register_skciphers(struct skcipher_alg *algs, int count); void crypto_unregister_skciphers(struct skcipher_alg *algs, int count); int skcipher_register_instance(struct crypto_template *tmpl, struct skcipher_instance *inst); int crypto_register_lskcipher(struct lskcipher_alg *alg); void crypto_unregister_lskcipher(struct lskcipher_alg *alg); int crypto_register_lskciphers(struct lskcipher_alg *algs, int count); void crypto_unregister_lskciphers(struct lskcipher_alg *algs, int count); int lskcipher_register_instance(struct crypto_template *tmpl, struct lskcipher_instance *inst); int skcipher_walk_done(struct skcipher_walk *walk, int err); int skcipher_walk_virt(struct skcipher_walk *walk, struct skcipher_request *req, bool atomic); int skcipher_walk_async(struct skcipher_walk *walk, struct skcipher_request *req); int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, struct aead_request *req, bool atomic); int skcipher_walk_aead_decrypt(struct skcipher_walk *walk, struct aead_request *req, bool atomic); void skcipher_walk_complete(struct skcipher_walk *walk, int err); static inline void skcipher_walk_abort(struct skcipher_walk *walk) { skcipher_walk_done(walk, -ECANCELED); } static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm) { return crypto_tfm_ctx(&tfm->base); } static inline void *crypto_lskcipher_ctx(struct crypto_lskcipher *tfm) { return crypto_tfm_ctx(&tfm->base); } static inline void *crypto_skcipher_ctx_dma(struct crypto_skcipher *tfm) { return crypto_tfm_ctx_dma(&tfm->base); } static inline void *skcipher_request_ctx(struct skcipher_request *req) { return req->__ctx; } static inline void *skcipher_request_ctx_dma(struct skcipher_request *req) { unsigned int align = crypto_dma_align(); if (align <= crypto_tfm_ctx_alignment()) align = 1; return PTR_ALIGN(skcipher_request_ctx(req), align); } static inline u32 skcipher_request_flags(struct skcipher_request *req) { return req->base.flags; } /* Helpers for simple block cipher modes of operation */ struct skcipher_ctx_simple { struct crypto_cipher *cipher; /* underlying block cipher */ }; static inline struct crypto_cipher * skcipher_cipher_simple(struct crypto_skcipher *tfm) { struct skcipher_ctx_simple *ctx = crypto_skcipher_ctx(tfm); return ctx->cipher; } struct skcipher_instance *skcipher_alloc_instance_simple( struct crypto_template *tmpl, struct rtattr **tb); static inline struct crypto_alg *skcipher_ialg_simple( struct skcipher_instance *inst) { struct crypto_cipher_spawn *spawn = skcipher_instance_ctx(inst); return crypto_spawn_cipher_alg(spawn); } static inline struct crypto_lskcipher *lskcipher_cipher_simple( struct crypto_lskcipher *tfm) { struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); return *ctx; } struct lskcipher_instance *lskcipher_alloc_instance_simple( struct crypto_template *tmpl, struct rtattr **tb); static inline struct lskcipher_alg *lskcipher_ialg_simple( struct lskcipher_instance *inst) { struct crypto_lskcipher_spawn *spawn = lskcipher_instance_ctx(inst); return crypto_lskcipher_spawn_alg(spawn); } #endif /* _CRYPTO_INTERNAL_SKCIPHER_H */ |
37 37 37 37 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 | // SPDX-License-Identifier: GPL-2.0-only /* * HT handling * * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH * Copyright(c) 2020-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "rate.h" static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa, struct ieee80211_ht_cap *ht_capa_mask, struct ieee80211_sta_ht_cap *ht_cap, u16 flag) { __le16 le_flag = cpu_to_le16(flag); if (ht_capa_mask->cap_info & le_flag) { if (!(ht_capa->cap_info & le_flag)) ht_cap->cap &= ~flag; } } static void __check_htcap_enable(struct ieee80211_ht_cap *ht_capa, struct ieee80211_ht_cap *ht_capa_mask, struct ieee80211_sta_ht_cap *ht_cap, u16 flag) { __le16 le_flag = cpu_to_le16(flag); if ((ht_capa_mask->cap_info & le_flag) && (ht_capa->cap_info & le_flag)) ht_cap->cap |= flag; } void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap) { struct ieee80211_ht_cap *ht_capa, *ht_capa_mask; u8 *scaps, *smask; int i; if (!ht_cap->ht_supported) return; switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: ht_capa = &sdata->u.mgd.ht_capa; ht_capa_mask = &sdata->u.mgd.ht_capa_mask; break; case NL80211_IFTYPE_ADHOC: ht_capa = &sdata->u.ibss.ht_capa; ht_capa_mask = &sdata->u.ibss.ht_capa_mask; break; default: WARN_ON_ONCE(1); return; } scaps = (u8 *)(&ht_capa->mcs.rx_mask); smask = (u8 *)(&ht_capa_mask->mcs.rx_mask); /* NOTE: If you add more over-rides here, update register_hw * ht_capa_mod_mask logic in main.c as well. * And, if this method can ever change ht_cap.ht_supported, fix * the check in ieee80211_add_ht_ie. */ /* check for HT over-rides, MCS rates first. */ for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { u8 m = smask[i]; ht_cap->mcs.rx_mask[i] &= ~m; /* turn off all masked bits */ /* Add back rates that are supported */ ht_cap->mcs.rx_mask[i] |= (m & scaps[i]); } /* Force removal of HT-40 capabilities? */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_SGI_40); /* Allow user to disable SGI-20 (SGI-40 is handled above) */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_SGI_20); /* Allow user to disable the max-AMSDU bit. */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); /* Allow user to disable LDPC */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_LDPC_CODING); /* Allow user to enable 40 MHz intolerant bit. */ __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_40MHZ_INTOLERANT); /* Allow user to enable TX STBC bit */ __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_TX_STBC); /* Allow user to configure RX STBC bits */ if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC)) ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) & IEEE80211_HT_CAP_RX_STBC; /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR) { u8 n = ht_capa->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR; if (n < ht_cap->ampdu_factor) ht_cap->ampdu_factor = n; } /* Allow the user to increase AMPDU density. */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_DENSITY) { u8 n = (ht_capa->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT; if (n > ht_cap->ampdu_density) ht_cap->ampdu_density = n; } } bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const struct ieee80211_ht_cap *ht_cap_ie, struct link_sta_info *link_sta) { struct ieee80211_bss_conf *link_conf; struct sta_info *sta = link_sta->sta; struct ieee80211_sta_ht_cap ht_cap, own_cap; u8 ampdu_info, tx_mcs_set_cap; int i, max_tx_streams; bool changed; enum ieee80211_sta_rx_bandwidth bw; enum nl80211_chan_width width; memset(&ht_cap, 0, sizeof(ht_cap)); if (!ht_cap_ie || !sband->ht_cap.ht_supported) goto apply; ht_cap.ht_supported = true; own_cap = sband->ht_cap; /* * If user has specified capability over-rides, take care * of that if the station we're setting up is the AP or TDLS peer that * we advertised a restricted capability set to. Override * our own capabilities and then use those below. */ if (sdata->vif.type == NL80211_IFTYPE_STATION || sdata->vif.type == NL80211_IFTYPE_ADHOC) ieee80211_apply_htcap_overrides(sdata, &own_cap); /* * The bits listed in this expression should be * the same for the peer and us, if the station * advertises more then we can't use those thus * we mask them out. */ ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & (own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_GRN_FLD | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40)); /* * The STBC bits are asymmetric -- if we don't have * TX then mask out the peer's RX and vice versa. */ if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC)) ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC)) ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; ht_cap.ampdu_factor = ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; ht_cap.ampdu_density = (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; /* own MCS TX capabilities */ tx_mcs_set_cap = own_cap.mcs.tx_params; /* Copy peer MCS TX capabilities, the driver might need them. */ ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; /* can we TX with MCS rates? */ if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) goto apply; /* Counting from 0, therefore +1 */ if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF) max_tx_streams = ((tx_mcs_set_cap & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK) >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; else max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; /* * 802.11n-2009 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream * - 8 to 31 are multiple spatial streams using equal modulation * [8..15 for two streams, 16..23 for three and 24..31 for four] * - remainder are multiple spatial streams using unequal modulation */ for (i = 0; i < max_tx_streams; i++) ht_cap.mcs.rx_mask[i] = own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; i < IEEE80211_HT_MCS_MASK_LEN; i++) ht_cap.mcs.rx_mask[i] = own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; /* handle MCS rate 32 too */ if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) ht_cap.mcs.rx_mask[32/8] |= 1; /* set Rx highest rate */ ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest; if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU) link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935; else link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; ieee80211_sta_recalc_aggregates(&sta->sta); apply: changed = memcmp(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap)); memcpy(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap)); rcu_read_lock(); link_conf = rcu_dereference(sdata->vif.link_conf[link_sta->link_id]); if (WARN_ON(!link_conf)) width = NL80211_CHAN_WIDTH_20_NOHT; else width = link_conf->chanreq.oper.width; switch (width) { default: WARN_ON_ONCE(1); fallthrough; case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: bw = IEEE80211_STA_RX_BW_20; break; case NL80211_CHAN_WIDTH_40: case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: case NL80211_CHAN_WIDTH_320: bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; break; } rcu_read_unlock(); link_sta->pub->bandwidth = bw; link_sta->cur_max_bandwidth = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { enum ieee80211_smps_mode smps_mode; switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> IEEE80211_HT_CAP_SM_PS_SHIFT) { case WLAN_HT_CAP_SM_PS_INVALID: case WLAN_HT_CAP_SM_PS_STATIC: smps_mode = IEEE80211_SMPS_STATIC; break; case WLAN_HT_CAP_SM_PS_DYNAMIC: smps_mode = IEEE80211_SMPS_DYNAMIC; break; case WLAN_HT_CAP_SM_PS_DISABLED: smps_mode = IEEE80211_SMPS_OFF; break; } if (smps_mode != link_sta->pub->smps_mode) changed = true; link_sta->pub->smps_mode = smps_mode; } else { link_sta->pub->smps_mode = IEEE80211_SMPS_OFF; } return changed; } void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason) { int i; lockdep_assert_wiphy(sta->local->hw.wiphy); for (i = 0; i < IEEE80211_NUM_TIDS; i++) __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_LEAVE_QBSS, reason != AGG_STOP_DESTROY_STA && reason != AGG_STOP_PEER_REQUEST); for (i = 0; i < IEEE80211_NUM_TIDS; i++) __ieee80211_stop_tx_ba_session(sta, i, reason); /* * In case the tear down is part of a reconfigure due to HW restart * request, it is possible that the low level driver requested to stop * the BA session, so handle it to properly clean tid_tx data. */ if(reason == AGG_STOP_DESTROY_STA) { wiphy_work_cancel(sta->local->hw.wiphy, &sta->ampdu_mlme.work); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { struct tid_ampdu_tx *tid_tx = rcu_dereference_protected_tid_tx(sta, i); if (!tid_tx) continue; if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, i, tid_tx); } } } void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) { struct sta_info *sta = container_of(work, struct sta_info, ampdu_mlme.work); struct tid_ampdu_tx *tid_tx; bool blocked; int tid; lockdep_assert_wiphy(sta->local->hw.wiphy); /* When this flag is set, new sessions should be blocked. */ blocked = test_sta_flag(sta, WLAN_STA_BLOCK_BA); for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_TIMEOUT, true); if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_stop_requested)) __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); if (!blocked && test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl)) __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, IEEE80211_MAX_AMPDU_BUF_HT, false, true, NULL); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, 0, false); spin_lock_bh(&sta->lock); tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; if (!blocked && tid_tx) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); struct ieee80211_sub_if_data *sdata = vif_to_sdata(txqi->txq.vif); struct fq *fq = &sdata->local->fq; spin_lock_bh(&fq->lock); /* Allow only frags to be dequeued */ set_bit(IEEE80211_TXQ_STOP, &txqi->flags); if (!skb_queue_empty(&txqi->frags)) { /* Fragmented Tx is ongoing, wait for it to * finish. Reschedule worker to retry later. */ spin_unlock_bh(&fq->lock); spin_unlock_bh(&sta->lock); /* Give the task working on the txq a chance * to send out the queued frags */ synchronize_net(); wiphy_work_queue(sdata->local->hw.wiphy, work); return; } spin_unlock_bh(&fq->lock); /* * Assign it over to the normal tid_tx array * where it "goes live". */ sta->ampdu_mlme.tid_start_tx[tid] = NULL; /* could there be a race? */ if (sta->ampdu_mlme.tid_tx[tid]) kfree(tid_tx); else ieee80211_assign_tid_tx(sta, tid, tid_tx); spin_unlock_bh(&sta->lock); ieee80211_tx_ba_session_handle_start(sta, tid); continue; } spin_unlock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) continue; if (!blocked && test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state)) ieee80211_start_tx_ba_cb(sta, tid, tid_tx); if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_LOCAL_REQUEST); if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, tid, tid_tx); } } void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u16 params; skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, da, sdata); skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); mgmt->u.action.category = WLAN_CATEGORY_BACK; mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; params = (u16)(initiator << 11); /* bit 11 initiator */ params |= (u16)(tid << 12); /* bit 15:12 TID number */ mgmt->u.action.u.delba.params = cpu_to_le16(params); mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); ieee80211_tx_skb(sdata, skb); } void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { u16 tid, params; u16 initiator; params = le16_to_cpu(mgmt->u.action.u.delba.params); tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; ht_dbg_ratelimited(sdata, "delba from %pM (%s) tid %d reason code %d\n", mgmt->sa, initiator ? "initiator" : "recipient", tid, le16_to_cpu(mgmt->u.action.u.delba.reason_code)); if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, true); else __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST); } enum nl80211_smps_mode ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps) { switch (smps) { case IEEE80211_SMPS_OFF: return NL80211_SMPS_OFF; case IEEE80211_SMPS_STATIC: return NL80211_SMPS_STATIC; case IEEE80211_SMPS_DYNAMIC: return NL80211_SMPS_DYNAMIC; default: return NL80211_SMPS_OFF; } } int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid, int link_id) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *action_frame; struct ieee80211_tx_info *info; u8 status_link_id = link_id < 0 ? 0 : link_id; /* 27 = header + category + action + smps mode */ skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); if (!skb) return -ENOMEM; skb_reserve(skb, local->hw.extra_tx_headroom); action_frame = skb_put(skb, 27); memcpy(action_frame->da, da, ETH_ALEN); memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(action_frame->bssid, bssid, ETH_ALEN); action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); action_frame->u.action.category = WLAN_CATEGORY_HT; action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS; switch (smps) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); smps = IEEE80211_SMPS_OFF; fallthrough; case IEEE80211_SMPS_OFF: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DISABLED; break; case IEEE80211_SMPS_STATIC: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_STATIC; break; case IEEE80211_SMPS_DYNAMIC: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DYNAMIC; break; } /* we'll do more on status of this frame */ info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; /* we have 13 bits, and need 6: link_id 4, smps 2 */ info->status_data = IEEE80211_STATUS_TYPE_SMPS | u16_encode_bits(status_link_id << 2 | smps, IEEE80211_STATUS_SUBDATA_MASK); ieee80211_tx_skb_tid(sdata, skb, 7, link_id); return 0; } void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id, enum ieee80211_smps_mode smps_mode) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) goto out; trace_api_request_smps(sdata->local, sdata, link, smps_mode); if (link->u.mgd.driver_smps_mode == smps_mode) goto out; link->u.mgd.driver_smps_mode = smps_mode; wiphy_work_queue(sdata->local->hw.wiphy, &link->u.mgd.request_smps_work); out: rcu_read_unlock(); } /* this might change ... don't want non-open drivers using it */ EXPORT_SYMBOL_GPL(ieee80211_request_smps); |
10 14 14 13 2 14 3 9 9 1 9 9 20 20 20 6 1 5 6 4 3 9 9 1 6 2 6 6 3 3 6 14 14 1 1 1 4 4 1 4 4 2 2 2 3 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Spanning tree protocol; interface code * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <net/switchdev.h> #include "br_private.h" #include "br_private_stp.h" /* Port id is composed of priority and port number. * NB: some bits of priority are dropped to * make room for more ports. */ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) { return ((u16)priority << BR_PORT_BITS) | (port_no & ((1<<BR_PORT_BITS)-1)); } #define BR_MAX_PORT_PRIORITY ((u16)~0 >> BR_PORT_BITS) /* called under bridge lock */ void br_init_port(struct net_bridge_port *p) { int err; p->port_id = br_make_port_id(p->priority, p->port_no); br_become_designated_port(p); br_set_state(p, BR_STATE_BLOCKING); p->topology_change_ack = 0; p->config_pending = 0; err = __set_ageing_time(p->dev, p->br->ageing_time); if (err) netdev_err(p->dev, "failed to offload ageing time\n"); } /* NO locks held */ void br_stp_enable_bridge(struct net_bridge *br) { struct net_bridge_port *p; spin_lock_bh(&br->lock); if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, jiffies + br->hello_time); mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10); br_config_bpdu_generation(br); list_for_each_entry(p, &br->port_list, list) { if (netif_running(p->dev) && netif_oper_up(p->dev)) br_stp_enable_port(p); } spin_unlock_bh(&br->lock); } /* NO locks held */ void br_stp_disable_bridge(struct net_bridge *br) { struct net_bridge_port *p; spin_lock_bh(&br->lock); list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED) br_stp_disable_port(p); } __br_set_topology_change(br, 0); br->topology_change_detected = 0; spin_unlock_bh(&br->lock); del_timer_sync(&br->hello_timer); del_timer_sync(&br->topology_change_timer); del_timer_sync(&br->tcn_timer); cancel_delayed_work_sync(&br->gc_work); } /* called under bridge lock */ void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); br_port_state_selection(p->br); br_ifinfo_notify(RTM_NEWLINK, NULL, p); } /* called under bridge lock */ void br_stp_disable_port(struct net_bridge_port *p) { struct net_bridge *br = p->br; int wasroot; wasroot = br_is_root_bridge(br); br_become_designated_port(p); br_set_state(p, BR_STATE_DISABLED); p->topology_change_ack = 0; p->config_pending = 0; br_ifinfo_notify(RTM_NEWLINK, NULL, p); del_timer(&p->message_age_timer); del_timer(&p->forward_delay_timer); del_timer(&p->hold_timer); if (!rcu_access_pointer(p->backup_port)) br_fdb_delete_by_port(br, p, 0, 0); br_multicast_disable_port(p); br_configuration_update(br); br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); } static int br_stp_call_user(struct net_bridge *br, char *arg) { char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL }; char *envp[] = { NULL }; int rc; /* call userspace STP and report program errors */ rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); if (rc > 0) { if (rc & 0xff) br_debug(br, BR_STP_PROG " received signal %d\n", rc & 0x7f); else br_debug(br, BR_STP_PROG " exited with code %d\n", (rc >> 8) & 0xff); } return rc; } static void br_stp_start(struct net_bridge *br) { int err = -ENOENT; if (net_eq(dev_net(br->dev), &init_net)) err = br_stp_call_user(br, "start"); if (err && err != -ENOENT) br_err(br, "failed to start userspace STP (%d)\n", err); spin_lock_bh(&br->lock); if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); if (!err) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); } else { br->stp_enabled = BR_KERNEL_STP; br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ if (br->dev->flags & IFF_UP) mod_timer(&br->hello_timer, jiffies + br->hello_time); br_port_state_selection(br); } spin_unlock_bh(&br->lock); } static void br_stp_stop(struct net_bridge *br) { int err; if (br->stp_enabled == BR_USER_STP) { err = br_stp_call_user(br, "stop"); if (err) br_err(br, "failed to stop userspace STP (%d)\n", err); /* To start timers on any ports left in blocking */ spin_lock_bh(&br->lock); br_port_state_selection(br); spin_unlock_bh(&br->lock); } br->stp_enabled = BR_NO_STP; } int br_stp_set_enabled(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { ASSERT_RTNL(); if (br_mrp_enabled(br)) { NL_SET_ERR_MSG_MOD(extack, "STP can't be enabled if MRP is already enabled"); return -EINVAL; } if (val) { if (br->stp_enabled == BR_NO_STP) br_stp_start(br); } else { if (br->stp_enabled != BR_NO_STP) br_stp_stop(br); } return 0; } /* called under bridge lock */ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) { /* should be aligned on 2 bytes for ether_addr_equal() */ unsigned short oldaddr_aligned[ETH_ALEN >> 1]; unsigned char *oldaddr = (unsigned char *)oldaddr_aligned; struct net_bridge_port *p; int wasroot; wasroot = br_is_root_bridge(br); br_fdb_change_mac_address(br, addr); memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); memcpy(br->bridge_id.addr, addr, ETH_ALEN); eth_hw_addr_set(br->dev, addr); list_for_each_entry(p, &br->port_list, list) { if (ether_addr_equal(p->designated_bridge.addr, oldaddr)) memcpy(p->designated_bridge.addr, addr, ETH_ALEN); if (ether_addr_equal(p->designated_root.addr, oldaddr)) memcpy(p->designated_root.addr, addr, ETH_ALEN); } br_configuration_update(br); br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); } /* should be aligned on 2 bytes for ether_addr_equal() */ static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1]; /* called under bridge lock */ bool br_stp_recalculate_bridge_id(struct net_bridge *br) { const unsigned char *br_mac_zero = (const unsigned char *)br_mac_zero_aligned; const unsigned char *addr = br_mac_zero; struct net_bridge_port *p; /* user has chosen a value so keep it */ if (br->dev->addr_assign_type == NET_ADDR_SET) return false; list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) addr = p->dev->dev_addr; } if (ether_addr_equal(br->bridge_id.addr, addr)) return false; /* no change */ br_stp_change_bridge_id(br, addr); return true; } /* Acquires and releases bridge lock */ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) { struct net_bridge_port *p; int wasroot; spin_lock_bh(&br->lock); wasroot = br_is_root_bridge(br); list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED && br_is_designated_port(p)) { p->designated_bridge.prio[0] = (newprio >> 8) & 0xFF; p->designated_bridge.prio[1] = newprio & 0xFF; } } br->bridge_id.prio[0] = (newprio >> 8) & 0xFF; br->bridge_id.prio[1] = newprio & 0xFF; br_configuration_update(br); br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); spin_unlock_bh(&br->lock); } /* called under bridge lock */ int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio) { port_id new_port_id; if (newprio > BR_MAX_PORT_PRIORITY) return -ERANGE; new_port_id = br_make_port_id(newprio, p->port_no); if (br_is_designated_port(p)) p->designated_port = new_port_id; p->port_id = new_port_id; p->priority = newprio; if (!memcmp(&p->br->bridge_id, &p->designated_bridge, 8) && p->port_id < p->designated_port) { br_become_designated_port(p); br_port_state_selection(p->br); } return 0; } /* called under bridge lock */ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) { if (path_cost < BR_MIN_PATH_COST || path_cost > BR_MAX_PATH_COST) return -ERANGE; p->flags |= BR_ADMIN_COST; p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); return 0; } ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) { return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n", id->prio[0], id->prio[1], id->addr[0], id->addr[1], id->addr[2], id->addr[3], id->addr[4], id->addr[5]); } |
2 1 1 2 9 4 3 2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 | // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2010-2011 EIA Electronics, // Kurt Van Dijck <kurt.van.dijck@eia.be> // Copyright (c) 2010-2011 EIA Electronics, // Pieter Beyens <pieter.beyens@eia.be> // Copyright (c) 2017-2019 Pengutronix, // Marc Kleine-Budde <kernel@pengutronix.de> // Copyright (c) 2017-2019 Pengutronix, // Oleksij Rempel <kernel@pengutronix.de> /* J1939 Address Claiming. * Address Claiming in the kernel * - keeps track of the AC states of ECU's, * - resolves NAME<=>SA taking into account the AC states of ECU's. * * All Address Claim msgs (including host-originated msg) are processed * at the receive path (a sent msg is always received again via CAN echo). * As such, the processing of AC msgs is done in the order on which msgs * are sent on the bus. * * This module doesn't send msgs itself (e.g. replies on Address Claims), * this is the responsibility of a user space application or daemon. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/netdevice.h> #include <linux/skbuff.h> #include "j1939-priv.h" static inline name_t j1939_skb_to_name(const struct sk_buff *skb) { return le64_to_cpup((__le64 *)skb->data); } static inline bool j1939_ac_msg_is_request(struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); int req_pgn; if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST) return false; req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16); return req_pgn == J1939_PGN_ADDRESS_CLAIMED; } static int j1939_ac_verify_outgoing(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); if (skb->len != 8) { netdev_notice(priv->ndev, "tx address claim with dlc %i\n", skb->len); return -EPROTO; } if (skcb->addr.src_name != j1939_skb_to_name(skb)) { netdev_notice(priv->ndev, "tx address claim with different name\n"); return -EPROTO; } if (skcb->addr.sa == J1939_NO_ADDR) { netdev_notice(priv->ndev, "tx address claim with broadcast sa\n"); return -EPROTO; } /* ac must always be a broadcast */ if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) { netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n"); return -EPROTO; } return 0; } int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); int ret; u8 addr; /* network mgmt: address claiming msgs */ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { struct j1939_ecu *ecu; ret = j1939_ac_verify_outgoing(priv, skb); /* return both when failure & when successful */ if (ret < 0) return ret; ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name); if (!ecu) return -ENODEV; if (ecu->addr != skcb->addr.sa) /* hold further traffic for ecu, remove from parent */ j1939_ecu_unmap(ecu); j1939_ecu_put(ecu); } else if (skcb->addr.src_name) { /* assign source address */ addr = j1939_name_to_addr(priv, skcb->addr.src_name); if (!j1939_address_is_unicast(addr) && !j1939_ac_msg_is_request(skb)) { netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n", skcb->addr.src_name); return -EADDRNOTAVAIL; } skcb->addr.sa = addr; } /* assign destination address */ if (skcb->addr.dst_name) { addr = j1939_name_to_addr(priv, skcb->addr.dst_name); if (!j1939_address_is_unicast(addr)) { netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n", skcb->addr.dst_name); return -EADDRNOTAVAIL; } skcb->addr.da = addr; } return 0; } static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_ecu *ecu, *prev; name_t name; if (skb->len != 8) { netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n", skb->len); return; } name = j1939_skb_to_name(skb); skcb->addr.src_name = name; if (!name) { netdev_notice(priv->ndev, "rx address claim without name\n"); return; } if (!j1939_address_is_valid(skcb->addr.sa)) { netdev_notice(priv->ndev, "rx address claim with broadcast sa\n"); return; } write_lock_bh(&priv->lock); /* Few words on the ECU ref counting: * * First we get an ECU handle, either with * j1939_ecu_get_by_name_locked() (increments the ref counter) * or j1939_ecu_create_locked() (initializes an ECU object * with a ref counter of 1). * * j1939_ecu_unmap_locked() will decrement the ref counter, * but only if the ECU was mapped before. So "ecu" still * belongs to us. * * j1939_ecu_timer_start() will increment the ref counter * before it starts the timer, so we can put the ecu when * leaving this function. */ ecu = j1939_ecu_get_by_name_locked(priv, name); if (ecu && ecu->addr == skcb->addr.sa) { /* The ISO 11783-5 standard, in "4.5.2 - Address claim * requirements", states: * d) No CF shall begin, or resume, transmission on the * network until 250 ms after it has successfully claimed * an address except when responding to a request for * address-claimed. * * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim * prioritization" show that the CF begins the transmission * after 250 ms from the first AC (address-claimed) message * even if it sends another AC message during that time window * to resolve the address contention with another CF. * * As stated in "4.4.2.3 - Address-claimed message": * In order to successfully claim an address, the CF sending * an address claimed message shall not receive a contending * claim from another CF for at least 250 ms. * * As stated in "4.4.3.2 - NAME management (NM) message": * 1) A commanding CF can * d) request that a CF with a specified NAME transmit * the address-claimed message with its current NAME. * 2) A target CF shall * d) send an address-claimed message in response to a * request for a matching NAME * * Taking the above arguments into account, the 250 ms wait is * requested only during network initialization. * * Do not restart the timer on AC message if both the NAME and * the address match and so if the address has already been * claimed (timer has expired) or the AC message has been sent * to resolve the contention with another CF (timer is still * running). */ goto out_ecu_put; } if (!ecu && j1939_address_is_unicast(skcb->addr.sa)) ecu = j1939_ecu_create_locked(priv, name); if (IS_ERR_OR_NULL(ecu)) goto out_unlock_bh; /* cancel pending (previous) address claim */ j1939_ecu_timer_cancel(ecu); if (j1939_address_is_idle(skcb->addr.sa)) { j1939_ecu_unmap_locked(ecu); goto out_ecu_put; } /* save new addr */ if (ecu->addr != skcb->addr.sa) j1939_ecu_unmap_locked(ecu); ecu->addr = skcb->addr.sa; prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa); if (prev) { if (ecu->name > prev->name) { j1939_ecu_unmap_locked(ecu); j1939_ecu_put(prev); goto out_ecu_put; } else { /* kick prev if less or equal */ j1939_ecu_unmap_locked(prev); j1939_ecu_put(prev); } } j1939_ecu_timer_start(ecu); out_ecu_put: j1939_ecu_put(ecu); out_unlock_bh: write_unlock_bh(&priv->lock); } void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_ecu *ecu; /* network mgmt */ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { j1939_ac_process(priv, skb); } else if (j1939_address_is_unicast(skcb->addr.sa)) { /* assign source name */ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa); if (ecu) { skcb->addr.src_name = ecu->name; j1939_ecu_put(ecu); } } /* assign destination name */ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da); if (ecu) { skcb->addr.dst_name = ecu->name; j1939_ecu_put(ecu); } } |
3 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | // SPDX-License-Identifier: GPL-2.0-only /* * iptables module for DCCP protocol header matching * * (C) 2005 by Harald Welte <laforge@netfilter.org> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/ip.h> #include <linux/dccp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_dccp.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: DCCP protocol packet match"); MODULE_ALIAS("ipt_dccp"); MODULE_ALIAS("ip6t_dccp"); #define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ || (!!((invflag) & (option)) ^ (cond))) static unsigned char *dccp_optbuf; static DEFINE_SPINLOCK(dccp_buflock); static inline bool dccp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, const struct dccp_hdr *dh, bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ const unsigned char *op; unsigned int optoff = __dccp_hdr_len(dh); unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh); unsigned int i; if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) goto invalid; if (!optlen) return false; spin_lock_bh(&dccp_buflock); op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf); if (op == NULL) { /* If we don't have the whole header, drop packet. */ goto partial; } for (i = 0; i < optlen; ) { if (op[i] == option) { spin_unlock_bh(&dccp_buflock); return true; } if (op[i] < 2) i++; else i += op[i+1]?:1; } spin_unlock_bh(&dccp_buflock); return false; partial: spin_unlock_bh(&dccp_buflock); invalid: *hotdrop = true; return false; } static inline bool match_types(const struct dccp_hdr *dh, u_int16_t typemask) { return typemask & (1 << dh->dccph_type); } static inline bool match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, const struct dccp_hdr *dh, bool *hotdrop) { return dccp_find_option(option, skb, protoff, dh, hotdrop); } static bool dccp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_dccp_info *info = par->matchinfo; const struct dccp_hdr *dh; struct dccp_hdr _dh; if (par->fragoff != 0) return false; dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh); if (dh == NULL) { par->hotdrop = true; return false; } return DCCHECK(ntohs(dh->dccph_sport) >= info->spts[0] && ntohs(dh->dccph_sport) <= info->spts[1], XT_DCCP_SRC_PORTS, info->flags, info->invflags) && DCCHECK(ntohs(dh->dccph_dport) >= info->dpts[0] && ntohs(dh->dccph_dport) <= info->dpts[1], XT_DCCP_DEST_PORTS, info->flags, info->invflags) && DCCHECK(match_types(dh, info->typemask), XT_DCCP_TYPE, info->flags, info->invflags) && DCCHECK(match_option(info->option, skb, par->thoff, dh, &par->hotdrop), XT_DCCP_OPTION, info->flags, info->invflags); } static int dccp_mt_check(const struct xt_mtchk_param *par) { const struct xt_dccp_info *info = par->matchinfo; if (info->flags & ~XT_DCCP_VALID_FLAGS) return -EINVAL; if (info->invflags & ~XT_DCCP_VALID_FLAGS) return -EINVAL; if (info->invflags & ~info->flags) return -EINVAL; return 0; } static struct xt_match dccp_mt_reg[] __read_mostly = { { .name = "dccp", .family = NFPROTO_IPV4, .checkentry = dccp_mt_check, .match = dccp_mt, .matchsize = sizeof(struct xt_dccp_info), .proto = IPPROTO_DCCP, .me = THIS_MODULE, }, { .name = "dccp", .family = NFPROTO_IPV6, .checkentry = dccp_mt_check, .match = dccp_mt, .matchsize = sizeof(struct xt_dccp_info), .proto = IPPROTO_DCCP, .me = THIS_MODULE, }, }; static int __init dccp_mt_init(void) { int ret; /* doff is 8 bits, so the maximum option size is (4*256). Don't put * this in BSS since DaveM is worried about locked TLB's for kernel * BSS. */ dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); if (!dccp_optbuf) return -ENOMEM; ret = xt_register_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg)); if (ret) goto out_kfree; return ret; out_kfree: kfree(dccp_optbuf); return ret; } static void __exit dccp_mt_exit(void) { xt_unregister_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg)); kfree(dccp_optbuf); } module_init(dccp_mt_init); module_exit(dccp_mt_exit); |
4 15 1 5 5 5 5 4 5 6 1 1 4 6 1 5 5 174 2 174 236 234 233 179 4 152 176 1 4 31 234 234 152 15 1 2 9 1 10 14 142 1 1 74 63 1 1 2 7 7 172 1 4 2 1 1 142 129 4 22 36 17 13 12 17 12 17 10 6 41 28 58 10 4 4 31 9 2 10 13 9 26 1 25 25 22 6 23 4 25 9 20 17 1 12 19 11 17 8 23 5 22 6 26 26 26 25 26 13 22 6 23 4 25 25 25 1 2 41 12 1 32 32 17 45 44 39 4 39 4 38 5 38 5 38 5 41 26 1 6 9 6 9 18 3 8 1 7 5 7 5 37 38 29 8 34 3 3 34 31 5 5 23 13 19 2 17 10 6 7 7 8 7 15 12 3 11 4 2 3 3 4 1 2 1 3 9 31 13 1 13 13 56 56 29 13 12 1 2 12 13 7 1 2 5 4 8 1 2 5 4 4 1 1 2 5 8 8 6 5 8 1 2 9 8 4 10 10 12 1 12 4 1 1 6 4 1 1 1 1 1 2 3 2 19 1 19 18 3 5 8 8 8 21 1 12 1 19 47 47 47 47 47 47 46 5 43 44 12 4 9 12 7 4 4 44 1 1 42 16 3 1 1 11 10 1 11 7 4 10 1 9 10 1 11 6 7 3 1 3 2 1 3 2 1 2 2 1 1 4 13 10 4 1 1 3 15 3 1 5 6 6 4 2 6 2 4 1 5 3 1 1 1 4 1 1 2 2 5 5 5 5 7 6 3 6 178 166 163 13 2 1 1 7 5 7 7 7 5 5 1 1 1 7 7 7 5 5 1 1 1 1 44 44 26 14 14 5 53 52 44 43 1 7 20 19 19 1 19 20 10 7 5 20 20 20 20 10 7 5 19 32 16 3 3 9 1 1 1 1 15 5 1 2 179 1 1 179 17 179 143 1 47 6 1 2 1 5 1 5 5 5 1 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/key/af_key.c An implementation of PF_KEYv2 sockets. * * Authors: Maxim Giryaev <gem@asplinux.ru> * David S. Miller <davem@redhat.com> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * Kazunori MIYAZAWA / USAGI Project <miyazawa@linux-ipv6.org> * Derek Atkins <derek@ihtfp.com> */ #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/socket.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/xfrm.h> #include <net/sock.h> #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) static unsigned int pfkey_net_id __read_mostly; struct netns_pfkey { /* List of all pfkey sockets. */ struct hlist_head table; atomic_t socks_nr; }; static DEFINE_MUTEX(pfkey_mutex); #define DUMMY_MARK 0 static const struct xfrm_mark dummy_mark = {0, 0}; struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; int registered; int promisc; struct { uint8_t msg_version; uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; struct sk_buff *skb; } dump; struct mutex dump_lock; }; static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family); static inline struct pfkey_sock *pfkey_sk(struct sock *sk) { return (struct pfkey_sock *)sk; } static int pfkey_can_dump(const struct sock *sk) { if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) return 1; return 0; } static void pfkey_terminate_dump(struct pfkey_sock *pfk) { if (pfk->dump.dump) { if (pfk->dump.skb) { kfree_skb(pfk->dump.skb); pfk->dump.skb = NULL; } pfk->dump.done(pfk); pfk->dump.dump = NULL; pfk->dump.done = NULL; } } static void pfkey_sock_destruct(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive pfkey socket: %p\n", sk); return; } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); atomic_dec(&net_pfkey->socks_nr); } static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); mutex_lock(&pfkey_mutex); sk_add_node_rcu(sk, &net_pfkey->table); mutex_unlock(&pfkey_mutex); } static void pfkey_remove(struct sock *sk) { mutex_lock(&pfkey_mutex); sk_del_node_init_rcu(sk); mutex_unlock(&pfkey_mutex); } static struct proto key_proto = { .name = "KEY", .owner = THIS_MODULE, .obj_size = sizeof(struct pfkey_sock), }; static int pfkey_create(struct net *net, struct socket *sock, int protocol, int kern) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; struct pfkey_sock *pfk; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; if (protocol != PF_KEY_V2) return -EPROTONOSUPPORT; sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) return -ENOMEM; pfk = pfkey_sk(sk); mutex_init(&pfk->dump_lock); sock->ops = &pfkey_ops; sock_init_data(sock, sk); sk->sk_family = PF_KEY; sk->sk_destruct = pfkey_sock_destruct; atomic_inc(&net_pfkey->socks_nr); pfkey_insert(sk); return 0; } static int pfkey_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; pfkey_remove(sk); sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_write_queue); synchronize_rcu(); sock_put(sk); return 0; } static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, struct sock *sk) { int err = -ENOBUFS; if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return err; skb = skb_clone(skb, allocation); if (skb) { skb_set_owner_r(skb, sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); err = 0; } return err; } /* Send SKB to all pfkey sockets matching selected criteria. */ #define BROADCAST_ALL 0 #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; int err = -ESRCH; /* XXX Do we need something like netlink_overrun? I think * XXX PF_KEY socket apps will not mind current behavior. */ if (!skb) return -ENOMEM; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; /* Yes, it means that if you are meant to receive this * pfkey message you receive it twice as promiscuous * socket. */ if (pfk->promisc) pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) continue; if (broadcast_flags != BROADCAST_ALL) { if (broadcast_flags & BROADCAST_PROMISC_ONLY) continue; if ((broadcast_flags & BROADCAST_REGISTERED) && !pfk->registered) continue; if (broadcast_flags & BROADCAST_ONE) continue; } err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* Error is cleared after successful sending to at least one * registered KM */ if ((broadcast_flags & BROADCAST_REGISTERED) && err) err = err2; } rcu_read_unlock(); if (one_sk != NULL) err = pfkey_broadcast_one(skb, allocation, one_sk); kfree_skb(skb); return err; } static int pfkey_do_dump(struct pfkey_sock *pfk) { struct sadb_msg *hdr; int rc; mutex_lock(&pfk->dump_lock); if (!pfk->dump.dump) { rc = 0; goto out; } rc = pfk->dump.dump(pfk); if (rc == -ENOBUFS) { rc = 0; goto out; } if (pfk->dump.skb) { if (!pfkey_can_dump(&pfk->sk)) { rc = 0; goto out; } hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } pfkey_terminate_dump(pfk); out: mutex_unlock(&pfk->dump_lock); return rc; } static inline void pfkey_hdr_dup(struct sadb_msg *new, const struct sadb_msg *orig) { *new = *orig; } static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) { struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); struct sadb_msg *hdr; if (!skb) return -ENOBUFS; /* Woe be to the platform trying to support PFKEY yet * having normal errnos outside the 1-255 range, inclusive. */ err = -err; if (err == ERESTARTSYS || err == ERESTARTNOHAND || err == ERESTARTNOINTR) err = EINTR; if (err >= 512) err = EINVAL; BUG_ON(err <= 0 || err >= 256); hdr = skb_put(skb, sizeof(struct sadb_msg)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = (uint8_t) err; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static const u8 sadb_ext_min_len[] = { [SADB_EXT_RESERVED] = (u8) 0, [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa), [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_HARD] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_SOFT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_ADDRESS_SRC] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_DST] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_PROXY] = (u8) sizeof(struct sadb_address), [SADB_EXT_KEY_AUTH] = (u8) sizeof(struct sadb_key), [SADB_EXT_KEY_ENCRYPT] = (u8) sizeof(struct sadb_key), [SADB_EXT_IDENTITY_SRC] = (u8) sizeof(struct sadb_ident), [SADB_EXT_IDENTITY_DST] = (u8) sizeof(struct sadb_ident), [SADB_EXT_SENSITIVITY] = (u8) sizeof(struct sadb_sens), [SADB_EXT_PROPOSAL] = (u8) sizeof(struct sadb_prop), [SADB_EXT_SUPPORTED_AUTH] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SUPPORTED_ENCRYPT] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SPIRANGE] = (u8) sizeof(struct sadb_spirange), [SADB_X_EXT_KMPRIVATE] = (u8) sizeof(struct sadb_x_kmprivate), [SADB_X_EXT_POLICY] = (u8) sizeof(struct sadb_x_policy), [SADB_X_EXT_SA2] = (u8) sizeof(struct sadb_x_sa2), [SADB_X_EXT_NAT_T_TYPE] = (u8) sizeof(struct sadb_x_nat_t_type), [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ static int verify_address_len(const void *p) { const struct sadb_address *sp = p; const struct sockaddr *addr = (const struct sockaddr *)(sp + 1); const struct sockaddr_in *sin; #if IS_ENABLED(CONFIG_IPV6) const struct sockaddr_in6 *sin6; #endif int len; if (sp->sadb_address_len < DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), sizeof(uint64_t))) return -EINVAL; switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 32) return -EINVAL; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 128) return -EINVAL; break; #endif default: /* It is user using kernel to keep track of security * associations for another protocol, such as * OSPF/RSVP/RIPV2/MIP. It is user's job to verify * lengths. * * XXX Actually, association/policy database is not yet * XXX able to cope with arbitrary sockaddr families. * XXX When it can, remove this -EINVAL. -DaveM */ return -EINVAL; } return 0; } static inline int sadb_key_len(const struct sadb_key *key) { int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8); return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes, sizeof(uint64_t)); } static int verify_key_len(const void *p) { const struct sadb_key *key = p; if (sadb_key_len(key) > key->sadb_key_len) return -EINVAL; return 0; } static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx) { return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + sec_ctx->sadb_x_ctx_len, sizeof(uint64_t)); } static inline int verify_sec_ctx_len(const void *p) { const struct sadb_x_sec_ctx *sec_ctx = p; int len = sec_ctx->sadb_x_ctx_len; if (len > PAGE_SIZE) return -EINVAL; len = pfkey_sec_ctx_len(sec_ctx); if (sec_ctx->sadb_x_sec_len != len) return -EINVAL; return 0; } static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx, gfp_t gfp) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; uctx = kmalloc((sizeof(*uctx)+ctx_size), gfp); if (!uctx) return NULL; uctx->len = pfkey_sec_ctx_len(sec_ctx); uctx->exttype = sec_ctx->sadb_x_sec_exttype; uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi; uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg; uctx->ctx_len = sec_ctx->sadb_x_ctx_len; memcpy(uctx + 1, sec_ctx + 1, uctx->ctx_len); return uctx; } static int present_and_same_family(const struct sadb_address *src, const struct sadb_address *dst) { const struct sockaddr *s_addr, *d_addr; if (!src || !dst) return 0; s_addr = (const struct sockaddr *)(src + 1); d_addr = (const struct sockaddr *)(dst + 1); if (s_addr->sa_family != d_addr->sa_family) return 0; if (s_addr->sa_family != AF_INET #if IS_ENABLED(CONFIG_IPV6) && s_addr->sa_family != AF_INET6 #endif ) return 0; return 1; } static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs) { const char *p = (char *) hdr; int len = skb->len; len -= sizeof(*hdr); p += sizeof(*hdr); while (len > 0) { const struct sadb_ext *ehdr = (const struct sadb_ext *) p; uint16_t ext_type; int ext_len; if (len < sizeof(*ehdr)) return -EINVAL; ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; if (ext_len < sizeof(uint64_t) || ext_len > len || ext_type == SADB_EXT_RESERVED) return -EINVAL; if (ext_type <= SADB_EXT_MAX) { int min = (int) sadb_ext_min_len[ext_type]; if (ext_len < min) return -EINVAL; if (ext_hdrs[ext_type-1] != NULL) return -EINVAL; switch (ext_type) { case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_X_EXT_NAT_T_OA: if (verify_address_len(p)) return -EINVAL; break; case SADB_X_EXT_SEC_CTX: if (verify_sec_ctx_len(p)) return -EINVAL; break; case SADB_EXT_KEY_AUTH: case SADB_EXT_KEY_ENCRYPT: if (verify_key_len(p)) return -EINVAL; break; default: break; } ext_hdrs[ext_type-1] = (void *) p; } p += ext_len; len -= ext_len; } return 0; } static uint16_t pfkey_satype2proto(uint8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: return IPSEC_PROTO_ANY; case SADB_SATYPE_AH: return IPPROTO_AH; case SADB_SATYPE_ESP: return IPPROTO_ESP; case SADB_X_SATYPE_IPCOMP: return IPPROTO_COMP; default: return 0; } /* NOTREACHED */ } static uint8_t pfkey_proto2satype(uint16_t proto) { switch (proto) { case IPPROTO_AH: return SADB_SATYPE_AH; case IPPROTO_ESP: return SADB_SATYPE_ESP; case IPPROTO_COMP: return SADB_X_SATYPE_IPCOMP; default: return 0; } /* NOTREACHED */ } /* BTW, this scheme means that there is no way with PFKEY2 sockets to * say specifically 'just raw sockets' as we encode them as 255. */ static uint8_t pfkey_proto_to_xfrm(uint8_t proto) { return proto == IPSEC_PROTO_ANY ? 0 : proto; } static uint8_t pfkey_proto_from_xfrm(uint8_t proto) { return proto ? proto : IPSEC_PROTO_ANY; } static inline int pfkey_sockaddr_len(sa_family_t family) { switch (family) { case AF_INET: return sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: return sizeof(struct sockaddr_in6); #endif } return 0; } static int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) { switch (sa->sa_family) { case AF_INET: xaddr->a4 = ((struct sockaddr_in *)sa)->sin_addr.s_addr; return AF_INET; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: memcpy(xaddr->a6, &((struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)); return AF_INET6; #endif } return 0; } static int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr) { return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), xaddr); } static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { const struct sadb_sa *sa; const struct sadb_address *addr; uint16_t proto; unsigned short family; xfrm_address_t *xaddr; sa = ext_hdrs[SADB_EXT_SA - 1]; if (sa == NULL) return NULL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return NULL; /* sadb_address_len should be checked by caller */ addr = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; if (addr == NULL) return NULL; family = ((const struct sockaddr *)(addr + 1))->sa_family; switch (family) { case AF_INET: xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr; break; #endif default: xaddr = NULL; } if (!xaddr) return NULL; return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) static int pfkey_sockaddr_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family)); } static inline int pfkey_mode_from_xfrm(int mode) { switch(mode) { case XFRM_MODE_TRANSPORT: return IPSEC_MODE_TRANSPORT; case XFRM_MODE_TUNNEL: return IPSEC_MODE_TUNNEL; case XFRM_MODE_BEET: return IPSEC_MODE_BEET; default: return -1; } } static inline int pfkey_mode_to_xfrm(int mode) { switch(mode) { case IPSEC_MODE_ANY: /*XXX*/ case IPSEC_MODE_TRANSPORT: return XFRM_MODE_TRANSPORT; case IPSEC_MODE_TUNNEL: return XFRM_MODE_TUNNEL; case IPSEC_MODE_BEET: return XFRM_MODE_BEET; default: return -1; } } static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port, struct sockaddr *sa, unsigned short family) { switch (family) { case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in *)sa; sin->sin_family = AF_INET; sin->sin_port = port; sin->sin_addr.s_addr = xaddr->a4; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return 32; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; sin6->sin6_addr = xaddr->in6; sin6->sin6_scope_id = 0; return 128; } #endif } return 0; } static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_lifetime *lifetime; struct sadb_address *addr; struct sadb_key *key; struct sadb_x_sa2 *sa2; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int size; int auth_key_size = 0; int encrypt_key_size = 0; int sockaddr_size; struct xfrm_encap_tmpl *natt = NULL; int mode; /* address family check */ sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return ERR_PTR(-EINVAL); /* base, SA, (lifetime (HSC),) address(SD), (address(P),) key(AE), (identity(SD),) (sensitivity)> */ size = sizeof(struct sadb_msg) +sizeof(struct sadb_sa) + sizeof(struct sadb_lifetime) + ((hsc & 1) ? sizeof(struct sadb_lifetime) : 0) + ((hsc & 2) ? sizeof(struct sadb_lifetime) : 0) + sizeof(struct sadb_address)*2 + sockaddr_size*2 + sizeof(struct sadb_x_sa2); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } /* identity & sensitivity */ if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { if (x->aalg && x->aalg->alg_key_len) { auth_key_size = PFKEY_ALIGN8((x->aalg->alg_key_len + 7) / 8); size += sizeof(struct sadb_key) + auth_key_size; } if (x->ealg && x->ealg->alg_key_len) { encrypt_key_size = PFKEY_ALIGN8((x->ealg->alg_key_len+7) / 8); size += sizeof(struct sadb_key) + encrypt_key_size; } } if (x->encap) natt = x->encap; if (natt && natt->encap_type) { size += sizeof(struct sadb_x_nat_t_type); size += sizeof(struct sadb_x_nat_t_port); size += sizeof(struct sadb_x_nat_t_port); } skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ hdr->sadb_msg_len = size / sizeof(uint64_t); /* sa */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = x->props.replay_window; switch (x->km.state) { case XFRM_STATE_VALID: sa->sadb_sa_state = x->km.dying ? SADB_SASTATE_DYING : SADB_SASTATE_MATURE; break; case XFRM_STATE_ACQ: sa->sadb_sa_state = SADB_SASTATE_LARVAL; break; default: sa->sadb_sa_state = SADB_SASTATE_DEAD; break; } sa->sadb_sa_auth = 0; if (x->aalg) { struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); sa->sadb_sa_auth = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_encrypt = 0; BUG_ON(x->ealg && x->calg); if (x->ealg) { struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } /* KAME compatible: sadb_sa_encrypt is overloaded with calg id */ if (x->calg) { struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_flags = 0; if (x->props.flags & XFRM_STATE_NOECN) sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; if (x->props.flags & XFRM_STATE_DECAP_DSCP) sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; if (x->props.flags & XFRM_STATE_NOPMTUDISC) sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC; /* hard time */ if (hsc & 2) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.hard_use_expires_seconds; } /* soft time */ if (hsc & 1) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds; } /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = x->curlft.packets; lifetime->sadb_lifetime_bytes = x->curlft.bytes; lifetime->sadb_lifetime_addtime = x->curlft.add_time; lifetime->sadb_lifetime_usetime = x->curlft.use_time; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; /* "if the ports are non-zero, then the sadb_address_proto field, normally zero, MUST be filled in with the transport protocol's number." - RFC2367 */ addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) { addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; addr->sadb_address_proto = pfkey_proto_from_xfrm(x->sel.proto); addr->sadb_address_prefixlen = x->sel.prefixlen_s; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&x->sel.saddr, x->sel.sport, (struct sockaddr *) (addr + 1), x->props.family); } /* auth key */ if (add_keys && auth_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + auth_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_AUTH; key->sadb_key_bits = x->aalg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->aalg->alg_key, (x->aalg->alg_key_len+7)/8); } /* encrypt key */ if (add_keys && encrypt_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + encrypt_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + encrypt_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT; key->sadb_key_bits = x->ealg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->ealg->alg_key, (x->ealg->alg_key_len+7)/8); } /* sa */ sa2 = skb_put(skb, sizeof(struct sadb_x_sa2)); sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t); sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2; if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) { kfree_skb(skb); return ERR_PTR(-EINVAL); } sa2->sadb_x_sa2_mode = mode; sa2->sadb_x_sa2_reserved1 = 0; sa2->sadb_x_sa2_reserved2 = 0; sa2->sadb_x_sa2_sequence = 0; sa2->sadb_x_sa2_reqid = x->props.reqid; if (natt && natt->encap_type) { struct sadb_x_nat_t_type *n_type; struct sadb_x_nat_t_port *n_port; /* type */ n_type = skb_put(skb, sizeof(*n_type)); n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t); n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; n_type->sadb_x_nat_t_type_type = natt->encap_type; n_type->sadb_x_nat_t_type_reserved[0] = 0; n_type->sadb_x_nat_t_type_reserved[1] = 0; n_type->sadb_x_nat_t_type_reserved[2] = 0; /* source port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* dest port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = natt->encap_dport; n_port->sadb_x_nat_t_port_reserved = 0; } /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x) { struct sk_buff *skb; skb = __pfkey_xfrm_state2msg(x, 1, 3); return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x, int hsc) { return __pfkey_xfrm_state2msg(x, 0, hsc); } static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct xfrm_state *x; const struct sadb_lifetime *lifetime; const struct sadb_sa *sa; const struct sadb_key *key; const struct sadb_x_sec_ctx *sec_ctx; uint16_t proto; int err; sa = ext_hdrs[SADB_EXT_SA - 1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_ESP && !ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_AH && !ext_hdrs[SADB_EXT_KEY_AUTH-1]) return ERR_PTR(-EINVAL); if (!!ext_hdrs[SADB_EXT_LIFETIME_HARD-1] != !!ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) return ERR_PTR(-EINVAL); proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return ERR_PTR(-EINVAL); /* default error is no buffer space */ err = -ENOBUFS; /* RFC2367: Only SADB_SASTATE_MATURE SAs may be submitted in an SADB_ADD message. SADB_SASTATE_LARVAL SAs are created by SADB_GETSPI and it is not sensible to add a new SA in the DYING or SADB_SASTATE_DEAD state. Therefore, the sadb_sa_state field of all submitted SAs MUST be SADB_SASTATE_MATURE and the kernel MUST return an error if this is not true. However, KAME setkey always uses SADB_SASTATE_LARVAL. Hence, we have to _ignore_ sadb_sa_state, which is also reasonable. */ if (sa->sadb_sa_auth > SADB_AALG_MAX || (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP && sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key != NULL && sa->sadb_sa_encrypt != SADB_EALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); x = xfrm_state_alloc(net); if (x == NULL) return ERR_PTR(-ENOBUFS); x->id.proto = proto; x->id.spi = sa->sadb_sa_spi; x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay, (sizeof(x->replay.bitmap) * 8)); if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN) x->props.flags |= XFRM_STATE_NOECN; if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) x->props.flags |= XFRM_STATE_DECAP_DSCP; if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT - 1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) goto out; err = security_xfrm_state_alloc(x, uctx); kfree(uctx); if (err) goto out; } err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); if (!x->aalg) { err = -ENOMEM; goto out; } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { x->aalg->alg_key_len = key->sadb_key_bits; memcpy(x->aalg->alg_key, key+1, keysize); } x->aalg->alg_trunc_len = a->uinfo.auth.icv_truncbits; x->props.aalgo = sa->sadb_sa_auth; /* x->algo.flags = sa->sadb_sa_flags; */ } if (sa->sadb_sa_encrypt) { if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); if (!x->calg) { err = -ENOMEM; goto out; } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { int keysize = 0; struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); if (!x->ealg) { err = -ENOMEM; goto out; } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { x->ealg->alg_key_len = key->sadb_key_bits; memcpy(x->ealg->alg_key, key+1, keysize); } x->props.ealgo = sa->sadb_sa_encrypt; x->geniv = a->uinfo.encr.geniv; } } /* x->algo.flags = sa->sadb_sa_flags; */ x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], &x->props.saddr); pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], &x->id.daddr); if (ext_hdrs[SADB_X_EXT_SA2-1]) { const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1]; int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) { err = -EINVAL; goto out; } x->props.mode = mode; x->props.reqid = sa2->sadb_x_sa2_reqid; } if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; /* Nobody uses this, but we try. */ x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); x->sel.prefixlen_s = addr->sadb_address_prefixlen; } if (!x->sel.family) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); if (!x->encap) { err = -ENOMEM; goto out; } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; natt->encap_type = n_type->sadb_x_nat_t_type_type; if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; natt->encap_sport = n_port->sadb_x_nat_t_port_port; } if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } } err = xfrm_init_state(x); if (err) goto out; x->km.seq = hdr->sadb_msg_seq; return x; out: x->km.state = XFRM_STATE_DEAD; xfrm_state_put(x); return ERR_PTR(err); } static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -EOPNOTSUPP; } static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct sk_buff *resp_skb; struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; struct sadb_msg *out_hdr; struct sadb_spirange *range; struct xfrm_state *x = NULL; int mode; int err; u32 min_spi, max_spi; u32 reqid; u8 proto; unsigned short family; xfrm_address_t *xsaddr = NULL, *xdaddr = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) { mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) return -EINVAL; reqid = sa2->sadb_x_sa2_reqid; } else { mode = 0; reqid = 0; } saddr = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; daddr = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; family = ((struct sockaddr *)(saddr + 1))->sa_family; switch (family) { case AF_INET: xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr; break; #endif } if (hdr->sadb_msg_seq) { x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; } } if (!x) x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; min_spi = 0x100; max_spi = 0x0fffffff; range = ext_hdrs[SADB_EXT_SPIRANGE-1]; if (range) { min_spi = range->sadb_spirange_min; max_spi = range->sadb_spirange_max; } err = verify_spi_info(x->id.proto, min_spi, max_spi, NULL); if (err) { xfrm_state_put(x); return err; } err = xfrm_alloc_spi(x, min_spi, max_spi, NULL); resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); if (IS_ERR(resp_skb)) { xfrm_state_put(x); return PTR_ERR(resp_skb); } out_hdr = (struct sadb_msg *) resp_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GETSPI; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; xfrm_state_put(x); pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8) return -EOPNOTSUPP; if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x == NULL) return 0; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_ACQ) x->km.state = XFRM_STATE_ERROR; spin_unlock_bh(&x->lock); xfrm_state_put(x); return 0; } static inline int event2poltype(int event) { switch (event) { case XFRM_MSG_DELPOLICY: return SADB_X_SPDDELETE; case XFRM_MSG_NEWPOLICY: return SADB_X_SPDADD; case XFRM_MSG_UPDPOLICY: return SADB_X_SPDUPDATE; case XFRM_MSG_POLEXPIRE: // return SADB_X_SPDEXPIRE; default: pr_err("pfkey: Unknown policy event %d\n", event); break; } return 0; } static inline int event2keytype(int event) { switch (event) { case XFRM_MSG_DELSA: return SADB_DELETE; case XFRM_MSG_NEWSA: return SADB_ADD; case XFRM_MSG_UPDSA: return SADB_UPDATE; case XFRM_MSG_EXPIRE: return SADB_EXPIRE; default: pr_err("pfkey: Unknown SA event %d\n", event); break; } return 0; } /* ADD/UPD/DEL */ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = pfkey_xfrm_state2msg(x); if (IS_ERR(skb)) return PTR_ERR(skb); hdr = (struct sadb_msg *) skb->data; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = event2keytype(c->event); hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; int err; struct km_event c; x = pfkey_msg2xfrm_state(net, hdr, ext_hdrs); if (IS_ERR(x)) return PTR_ERR(x); xfrm_state_hold(x); if (hdr->sadb_msg_type == SADB_ADD) err = xfrm_state_add(x); else err = xfrm_state_update(x); xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); goto out; } if (hdr->sadb_msg_type == SADB_ADD) c.event = XFRM_MSG_NEWSA; else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); return err; } static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; struct km_event c; int err; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; if ((err = security_xfrm_state_delete(x))) goto out; if (xfrm_state_kern(x)) { err = -EPERM; goto out; } err = xfrm_state_delete(x); if (err < 0) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); __u8 proto; struct sk_buff *out_skb; struct sadb_msg *out_hdr; struct xfrm_state *x; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; out_skb = pfkey_xfrm_state2msg(x); proto = x->id.proto; xfrm_state_put(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GET; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, gfp_t allocation) { struct sk_buff *skb; struct sadb_msg *hdr; int len, auth_len, enc_len, i; auth_len = xfrm_count_pfkey_auth_supported(); if (auth_len) { auth_len *= sizeof(struct sadb_alg); auth_len += sizeof(struct sadb_supported); } enc_len = xfrm_count_pfkey_enc_supported(); if (enc_len) { enc_len *= sizeof(struct sadb_alg); enc_len += sizeof(struct sadb_supported); } len = enc_len + auth_len + sizeof(struct sadb_msg); skb = alloc_skb(len + 16, allocation); if (!skb) goto out_put_algs; hdr = skb_put(skb, sizeof(*hdr)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = 0; hdr->sadb_msg_len = len / sizeof(uint64_t); if (auth_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, auth_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = auth_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; for (i = 0; ; i++) { struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg->available) *ap++ = aalg->desc; } } if (enc_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, enc_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = enc_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; for (i = 0; ; i++) { struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (ealg->available) *ap++ = ealg->desc; } } out_put_algs: return skb; } static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *supp_skb; if (hdr->sadb_msg_satype > SADB_SATYPE_MAX) return -EINVAL; if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) { if (pfk->registered&(1<<hdr->sadb_msg_satype)) return -EEXIST; pfk->registered |= (1<<hdr->sadb_msg_satype); } mutex_lock(&pfkey_mutex); xfrm_probe_algs(); supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); mutex_unlock(&pfkey_mutex); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<<hdr->sadb_msg_satype); return -ENOBUFS; } pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk)); return 0; } static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put_data(skb, ihdr, sizeof(struct sadb_msg)); hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int proto; struct km_event c; int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; err = xfrm_state_flush(net, proto, true, false); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ err = 0; return err ? err : err2; } c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); return 0; } static int dump_sa(struct xfrm_state *x, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_state2msg(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_DUMP; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sa(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_state_walk(net, &pfk->dump.u.state, dump_sa, (void *) pfk); } static void pfkey_dump_sa_done(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); xfrm_state_walk_done(&pfk->dump.u.state, net); } static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { u8 proto; struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; if ((xfilter->sadb_x_filter_splen > (sizeof(xfrm_address_t) << 3)) || (xfilter->sadb_x_filter_dplen > (sizeof(xfrm_address_t) << 3))) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); return -ENOMEM; } memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, sizeof(xfrm_address_t)); memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr, sizeof(xfrm_address_t)); filter->family = xfilter->sadb_x_filter_family; filter->splen = xfilter->sadb_x_filter_splen; filter->dplen = xfilter->sadb_x_filter_dplen; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); int satype = hdr->sadb_msg_satype; bool reset_errno = false; if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { reset_errno = true; if (satype != 0 && satype != 1) return -EINVAL; pfk->promisc = satype; } if (reset_errno && skb_cloned(skb)) skb = skb_copy(skb, GFP_KERNEL); else skb = skb_clone(skb, GFP_KERNEL); if (reset_errno && skb) { struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data; new_hdr->sadb_msg_errno = 0; } pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) { int i; u32 reqid = *(u32*)ptr; for (i=0; i<xp->xfrm_nr; i++) { if (xp->xfrm_vec[i].reqid == reqid) return -EEXIST; } return 0; } static u32 gen_reqid(struct net *net) { struct xfrm_policy_walk walk; u32 start; int rc; static u32 reqid = IPSEC_MANUAL_REQID_MAX; start = reqid; do { ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); xfrm_policy_walk_done(&walk, net); if (rc != -EEXIST) return reqid; } while (reqid != start); return 0; } static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol, struct sadb_x_ipsecrequest *rq) { struct net *net = xp_net(xp); struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; int mode; if (xp->xfrm_nr >= XFRM_MAX_DEPTH) return -ELOOP; if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) return -EINVAL; t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) { if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) && pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND) return -EINVAL; t->optional = 1; } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) { t->reqid = rq->sadb_x_ipsecrequest_reqid; if (t->reqid > IPSEC_MANUAL_REQID_MAX) t->reqid = 0; if (!t->reqid && !(t->reqid = gen_reqid(net))) return -ENOBUFS; } /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { int err; err = parse_sockaddr_pair( (struct sockaddr *)(rq + 1), rq->sadb_x_ipsecrequest_len - sizeof(*rq), &t->saddr, &t->id.daddr, &t->encap_family); if (err) return err; } else t->encap_family = xp->family; /* No way to set this via kame pfkey */ t->allalgs = 1; xp->xfrm_nr++; return 0; } static int parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) { int err; int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy); struct sadb_x_ipsecrequest *rq = (void*)(pol+1); if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) return -EINVAL; while (len >= sizeof(*rq)) { if (len < rq->sadb_x_ipsecrequest_len || rq->sadb_x_ipsecrequest_len < sizeof(*rq)) return -EINVAL; if ((err = parse_ipsecrequest(xp, pol, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len); } return 0; } static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp) { struct xfrm_sec_ctx *xfrm_ctx = xp->security; if (xfrm_ctx) { int len = sizeof(struct sadb_x_sec_ctx); len += xfrm_ctx->ctx_len; return PFKEY_ALIGN8(len); } return 0; } static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp) { const struct xfrm_tmpl *t; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = 0; int i; for (i=0; i<xp->xfrm_nr; i++) { t = xp->xfrm_vec + i; socklen += pfkey_sockaddr_len(t->encap_family); } return sizeof(struct sadb_msg) + (sizeof(struct sadb_lifetime) * 3) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy) + (xp->xfrm_nr * sizeof(struct sadb_x_ipsecrequest)) + (socklen * 2) + pfkey_xfrm_policy2sec_ctx_size(xp); } static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp) { struct sk_buff *skb; int size; size = pfkey_xfrm_policy2msg_size(xp); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); return skb; } static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir) { struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_lifetime *lifetime; struct sadb_x_policy *pol; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int i; int size; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = pfkey_sockaddr_len(xp->family); size = pfkey_xfrm_policy2msg_size(xp); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_s; addr->sadb_address_reserved = 0; if (!pfkey_sockaddr_fill(&xp->selector.saddr, xp->selector.sport, (struct sockaddr *) (addr + 1), xp->family)) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_d; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&xp->selector.daddr, xp->selector.dport, (struct sockaddr *) (addr + 1), xp->family); /* hard time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds; /* soft time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds; /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = xp->curlft.packets; lifetime->sadb_lifetime_bytes = xp->curlft.bytes; lifetime->sadb_lifetime_addtime = xp->curlft.add_time; lifetime->sadb_lifetime_usetime = xp->curlft.use_time; pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD; if (xp->action == XFRM_POLICY_ALLOW) { if (xp->xfrm_nr) pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; else pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; for (i=0; i<xp->xfrm_nr; i++) { const struct xfrm_tmpl *t = xp->xfrm_vec + i; struct sadb_x_ipsecrequest *rq; int req_size; int mode; req_size = sizeof(struct sadb_x_ipsecrequest); if (t->mode == XFRM_MODE_TUNNEL) { socklen = pfkey_sockaddr_len(t->encap_family); req_size += socklen * 2; } else { size -= 2*socklen; } rq = skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; memset(rq, 0, sizeof(*rq)); rq->sadb_x_ipsecrequest_len = req_size; rq->sadb_x_ipsecrequest_proto = t->id.proto; if ((mode = pfkey_mode_from_xfrm(t->mode)) < 0) return -EINVAL; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE; if (t->reqid) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE; if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; if (t->mode == XFRM_MODE_TUNNEL) { u8 *sa = (void *)(rq + 1); pfkey_sockaddr_fill(&t->saddr, 0, (struct sockaddr *)sa, t->encap_family); pfkey_sockaddr_fill(&t->id.daddr, 0, (struct sockaddr *) (sa + socklen), t->encap_family); } } /* security context */ if ((xfrm_ctx = xp->security)) { int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp); sec_ctx = skb_put(skb, ctx_size); sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_reserved = refcount_read(&xp->refcnt); return 0; } static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; if (c->data.byid && c->event == XFRM_MSG_DELPOLICY) out_hdr->sadb_msg_type = SADB_X_SPDDELETE2; else out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err = 0; struct sadb_lifetime *lifetime; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (pol->sadb_x_policy_type > IPSEC_POLICY_IPSEC) return -EINVAL; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) return -ENOBUFS; xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->priority = pol->sadb_x_policy_priority; sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); xp->selector.family = xp->family; xp->selector.prefixlen_s = sa->sadb_address_prefixlen; xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.sport) xp->selector.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); xp->selector.prefixlen_d = sa->sadb_address_prefixlen; /* Amusing, we set this twice. KAME apps appear to set same value * in both addresses. */ xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.dport) xp->selector.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) { err = -ENOBUFS; goto out; } err = security_xfrm_policy_alloc(&xp->security, uctx, GFP_KERNEL); kfree(uctx); if (err) goto out; } xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD-1]) != NULL) { xp->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) != NULL) { xp->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (err = parse_ipsecrequests(xp, pol)) < 0) goto out; err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) goto out; if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) c.event = XFRM_MSG_UPDPOLICY; else c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); return 0; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct xfrm_selector sel; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *pol_ctx = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; memset(&sel, 0, sizeof(sel)); sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) return -ENOMEM; err = security_xfrm_policy_alloc(&pol_ctx, uctx, GFP_KERNEL); kfree(uctx); if (err) return err; } xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); if (xp == NULL) return -ENOENT; xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); out: xfrm_pol_put(xp); return err; } static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir) { int err; struct sk_buff *out_skb; struct sadb_msg *out_hdr; err = 0; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) { err = PTR_ERR(out_skb); goto out; } err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); goto out; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = hdr->sadb_msg_type; out_hdr->sadb_msg_satype = 0; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: return err; } static int pfkey_sockaddr_pair_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { int af, socklen; if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; *family = af; return 0; } #ifdef CONFIG_NET_KEY_MIGRATE static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct xfrm_migrate *m) { int err; struct sadb_x_ipsecrequest *rq2; int mode; if (len < sizeof(*rq1) || len < rq1->sadb_x_ipsecrequest_len || rq1->sadb_x_ipsecrequest_len < sizeof(*rq1)) return -EINVAL; /* old endoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), rq1->sadb_x_ipsecrequest_len - sizeof(*rq1), &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); len -= rq1->sadb_x_ipsecrequest_len; if (len <= sizeof(*rq2) || len < rq2->sadb_x_ipsecrequest_len || rq2->sadb_x_ipsecrequest_len < sizeof(*rq2)) return -EINVAL; /* new endpoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), rq2->sadb_x_ipsecrequest_len - sizeof(*rq2), &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; if (rq1->sadb_x_ipsecrequest_proto != rq2->sadb_x_ipsecrequest_proto || rq1->sadb_x_ipsecrequest_mode != rq2->sadb_x_ipsecrequest_mode || rq1->sadb_x_ipsecrequest_reqid != rq2->sadb_x_ipsecrequest_reqid) return -EINVAL; m->proto = rq1->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq1->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; m->mode = mode; m->reqid = rq1->sadb_x_ipsecrequest_reqid; return ((int)(rq1->sadb_x_ipsecrequest_len + rq2->sadb_x_ipsecrequest_len)); } static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; struct xfrm_kmaddress k; struct net *net = sock_net(sk); if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } if (kma) { /* convert sadb_x_kmaddress to xfrm_kmaddress */ k.reserved = kma->sadb_x_kmaddress_reserved; ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), &k.local, &k.remote, &k.family); if (ret < 0) { err = ret; goto out; } } dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); /* set source address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_SRC - 1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); /* set destination address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); rq = (struct sadb_x_ipsecrequest *)(pol + 1); /* extract ipsecrequests */ i = 0; len = pol->sadb_x_policy_len * 8 - sizeof(struct sadb_x_policy); while (len > 0 && i < XFRM_MAX_DEPTH) { ret = ipsecrequests_to_migrate(rq, len, &m[i]); if (ret < 0) { err = ret; goto out; } else { rq = (struct sadb_x_ipsecrequest *)((u8 *)rq + ret); len -= ret; i++; } } if (!i || len > 0) { err = -EINVAL; goto out; } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, kma ? &k : NULL, net, NULL, 0, NULL); out: return err; } #else static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -ENOPROTOOPT; } #endif static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int dir; int err = 0, delete; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) return -EINVAL; dir = xfrm_policy_id2dir(pol->sadb_x_policy_id); if (dir >= XFRM_POLICY_MAX) return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; if (delete) { xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); } else { err = key_pol_get_resp(sk, xp, hdr, dir); } out: xfrm_pol_put(xp); return err; } static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sp(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_policy_walk(net, &pfk->dump.u.policy, dump_sp, (void *) pfk); } static void pfkey_dump_sp_done(struct pfkey_sock *pfk) { struct net *net = sock_net((struct sock *)pfk); xfrm_policy_walk_done(&pfk->dump.u.policy, net); } static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int key_notify_policy_flush(const struct km_event *c) { struct sk_buff *skb_out; struct sadb_msg *hdr; skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb_out) return -ENOBUFS; hdr = skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct km_event c; int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ return 0; return err; } c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); return 0; } typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs); static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_RESERVED] = pfkey_reserved, [SADB_GETSPI] = pfkey_getspi, [SADB_UPDATE] = pfkey_add, [SADB_ADD] = pfkey_add, [SADB_DELETE] = pfkey_delete, [SADB_GET] = pfkey_get, [SADB_ACQUIRE] = pfkey_acquire, [SADB_REGISTER] = pfkey_register, [SADB_EXPIRE] = NULL, [SADB_FLUSH] = pfkey_flush, [SADB_DUMP] = pfkey_dump, [SADB_X_PROMISC] = pfkey_promisc, [SADB_X_PCHANGE] = NULL, [SADB_X_SPDUPDATE] = pfkey_spdadd, [SADB_X_SPDADD] = pfkey_spdadd, [SADB_X_SPDDELETE] = pfkey_spddelete, [SADB_X_SPDGET] = pfkey_spdget, [SADB_X_SPDACQUIRE] = NULL, [SADB_X_SPDDUMP] = pfkey_spddump, [SADB_X_SPDFLUSH] = pfkey_spdflush, [SADB_X_SPDSETIDX] = pfkey_spdadd, [SADB_X_SPDDELETE2] = pfkey_spdget, [SADB_X_MIGRATE] = pfkey_migrate, }; static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr) { void *ext_hdrs[SADB_EXT_MAX]; int err; /* Non-zero return value of pfkey_broadcast() does not always signal * an error and even on an actual error we may still want to process * the message so rather ignore the return value. */ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); if (!err) { err = -EOPNOTSUPP; if (pfkey_funcs[hdr->sadb_msg_type]) err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs); } return err; } static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp) { struct sadb_msg *hdr = NULL; if (skb->len < sizeof(*hdr)) { *errp = -EMSGSIZE; } else { hdr = (struct sadb_msg *) skb->data; if (hdr->sadb_msg_version != PF_KEY_V2 || hdr->sadb_msg_reserved != 0 || (hdr->sadb_msg_type <= SADB_RESERVED || hdr->sadb_msg_type > SADB_MAX)) { hdr = NULL; *errp = -EINVAL; } else if (hdr->sadb_msg_len != (skb->len / sizeof(uint64_t)) || hdr->sadb_msg_len < (sizeof(struct sadb_msg) / sizeof(uint64_t))) { hdr = NULL; *errp = -EMSGSIZE; } else { *errp = 0; } } return hdr; } static inline int aalg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->aalgos) * 8) return 0; return (t->aalgos >> id) & 1; } static inline int ealg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->ealgos) * 8) return 0; return (t->ealgos >> id) & 1; } static int count_ah_combs(const struct xfrm_tmpl *t) { int i, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); } static int count_esp_combs(const struct xfrm_tmpl *t) { int i, k, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg) && aalg->available) { struct sadb_comb *c; c = skb_put_zero(skb, sizeof(struct sadb_comb)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i=0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { struct sadb_comb *c; const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) continue; c = skb_put(skb, sizeof(struct sadb_comb)); memset(c, 0, sizeof(*c)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_encrypt = ealg->desc.sadb_alg_id; c->sadb_comb_encrypt_minbits = ealg->desc.sadb_alg_minbits; c->sadb_comb_encrypt_maxbits = ealg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) { return 0; } static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int hard; int hsc; hard = c->data.hard; if (hard) hsc = 2; else hsc = 1; out_skb = pfkey_xfrm_state2msg_expire(x, hsc); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; out_hdr->sadb_msg_type = SADB_EXPIRE; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); return 0; } static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = x ? xs_net(x) : c->net; struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); if (atomic_read(&net_pfkey->socks_nr) == 0) return 0; switch (c->event) { case XFRM_MSG_EXPIRE: return key_notify_sa_expire(x, c); case XFRM_MSG_DELSA: case XFRM_MSG_NEWSA: case XFRM_MSG_UPDSA: return key_notify_sa(x, c); case XFRM_MSG_FLUSHSA: return key_notify_sa_flush(c); case XFRM_MSG_NEWAE: /* not yet supported */ break; default: pr_err("pfkey: Unknown SA event %d\n", c->event); break; } return 0; } static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) return 0; switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); case XFRM_MSG_DELPOLICY: case XFRM_MSG_NEWPOLICY: case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: if (c->data.type != XFRM_POLICY_TYPE_MAIN) break; return key_notify_policy_flush(c); default: pr_err("pfkey: Unknown policy event %d\n", c->event); break; } return 0; } static u32 get_acqseq(void) { u32 res; static atomic_t acqseq; do { res = atomic_inc_return(&acqseq); } while (!res); return res; } static bool pfkey_is_alive(const struct km_event *c) { struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id); struct sock *sk; bool is_alive = false; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { if (pfkey_sk(sk)->registered) { is_alive = true; break; } } rcu_read_unlock(); return is_alive; } static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_x_policy *pol; int sockaddr_size; int size; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; size = sizeof(struct sadb_msg) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_ACQUIRE; hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq = get_acqseq(); hdr->sadb_msg_pid = 0; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ alg_size = 0; if (x->id.proto == IPPROTO_AH) alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) alg_size = dump_esp_combs(skb, t); hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct net *net = sock_net(sk); struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #endif default: *dir = -EINVAL; return NULL; } *dir = -EINVAL; if (len < sizeof(struct sadb_x_policy) || pol->sadb_x_policy_len*8 > len || pol->sadb_x_policy_type > IPSEC_POLICY_BYPASS || (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND)) return NULL; xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; } xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (*dir = parse_ipsecrequests(xp, pol)) < 0) goto out; /* security context too */ if (len >= (pol->sadb_x_policy_len*8 + sizeof(struct sadb_x_sec_ctx))) { char *p = (char *)pol; struct xfrm_user_sec_ctx *uctx; p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); *dir = security_xfrm_policy_alloc(&xp->security, uctx, GFP_ATOMIC); kfree(uctx); if (*dir) goto out; } *dir = pol->sadb_x_policy_dir-1; return xp; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_address *addr; struct sadb_x_nat_t_port *n_port; int sockaddr_size; int size; __u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0); struct xfrm_encap_tmpl *natt = NULL; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; if (!satype) return -EINVAL; if (!x->encap) return -EINVAL; natt = x->encap; /* Build an SADB_X_NAT_T_NEW_MAPPING message: * * HDR | SA | ADDRESS_SRC (old addr) | NAT_T_SPORT (old port) | * ADDRESS_DST (new addr) | NAT_T_DPORT (new port) */ size = sizeof(struct sadb_msg) + sizeof(struct sadb_sa) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + (sizeof(struct sadb_x_nat_t_port) * 2); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING; hdr->sadb_msg_satype = satype; hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = 0; sa->sadb_sa_state = 0; sa->sadb_sa_auth = 0; sa->sadb_sa_encrypt = 0; sa->sadb_sa_flags = 0; /* ADDRESS_SRC (old addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_SPORT (old port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* ADDRESS_DST (new addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(ipaddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_DPORT (new port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE static int set_sadb_address(struct sk_buff *skb, int sasize, int type, const struct xfrm_selector *sel) { struct sadb_address *addr; addr = skb_put(skb, sizeof(struct sadb_address) + sasize); addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; addr->sadb_address_exttype = type; addr->sadb_address_proto = sel->proto; addr->sadb_address_reserved = 0; switch (type) { case SADB_EXT_ADDRESS_SRC: addr->sadb_address_prefixlen = sel->prefixlen_s; pfkey_sockaddr_fill(&sel->saddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; case SADB_EXT_ADDRESS_DST: addr->sadb_address_prefixlen = sel->prefixlen_d; pfkey_sockaddr_fill(&sel->daddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; default: return -EINVAL; } return 0; } static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k) { struct sadb_x_kmaddress *kma; u8 *sa; int family = k->family; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = (sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(family)); kma = skb_put_zero(skb, size_req); kma->sadb_x_kmaddress_len = size_req / 8; kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; kma->sadb_x_kmaddress_reserved = k->reserved; sa = (u8 *)(kma + 1); if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) return -EINVAL; return 0; } static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; u8 *sa; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(family); rq = skb_put_zero(skb, size_req); rq->sadb_x_ipsecrequest_len = size_req; rq->sadb_x_ipsecrequest_proto = proto; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = level; rq->sadb_x_ipsecrequest_reqid = reqid; sa = (u8 *) (rq + 1); if (!pfkey_sockaddr_fill(src, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(dst, 0, (struct sockaddr *)(sa + socklen), family)) return -EINVAL; return 0; } #endif #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { int i; int sasize_sel; int size = 0; int size_pol = 0; struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_x_policy *pol; const struct xfrm_migrate *mp; if (type != XFRM_POLICY_TYPE_MAIN) return 0; if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; if (k != NULL) { /* addresses for KM */ size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(k->family)); } /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) return -EINVAL; size += (sizeof(struct sadb_address) + sasize_sel) * 2; /* policy info */ size_pol += sizeof(struct sadb_x_policy); /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->old_family); /* new locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->new_family); } size += sizeof(struct sadb_msg) + size_pol; /* alloc buffer */ skb = alloc_skb(size, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_MIGRATE; hdr->sadb_msg_satype = pfkey_proto2satype(m->proto); hdr->sadb_msg_len = size / 8; hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; /* Addresses to be used by KM for negotiation, if ext is available */ if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) goto err; /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); /* selector dst */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel); /* policy information */ pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = size_pol / 8; pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old ipsecrequest */ int mode = pfkey_mode_from_xfrm(mp->mode); if (mode < 0) goto err; if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->old_family, &mp->old_saddr, &mp->old_daddr) < 0) goto err; /* new ipsecrequest */ if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->new_family, &mp->new_saddr, &mp->new_daddr) < 0) goto err; } /* broadcast migrate message to sockets */ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; err: kfree_skb(skb); return -EINVAL; } #else static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; } #endif static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sadb_msg *hdr = NULL; int err; struct net *net = sock_net(sk); err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) goto out; err = -EMSGSIZE; if ((unsigned int)len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; skb = alloc_skb(len, GFP_KERNEL); if (skb == NULL) goto out; err = -EFAULT; if (memcpy_from_msg(skb_put(skb,len), msg, len)) goto out; hdr = pfkey_get_base_msg(skb, &err); if (!hdr) goto out; mutex_lock(&net->xfrm.xfrm_cfg_mutex); err = pfkey_process(sk, skb, hdr); mutex_unlock(&net->xfrm.xfrm_cfg_mutex); out: if (err && hdr && pfkey_error(hdr, err, sk) == 0) err = 0; kfree_skb(skb); return err ? : len; } static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *skb; int copied, err; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; copied = skb->len; if (copied > len) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free; sock_recv_cmsgs(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; if (pfk->dump.dump != NULL && 3 * atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) pfkey_do_dump(pfk); out_free: skb_free_datagram(sk, skb); out: return err; } static const struct proto_ops pfkey_ops = { .family = PF_KEY, .owner = THIS_MODULE, /* Operations that make no sense on pfkey sockets. */ .bind = sock_no_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, /* Now the operations that really occur. */ .release = pfkey_release, .poll = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg, }; static const struct net_proto_family pfkey_family_ops = { .family = PF_KEY, .create = pfkey_create, .owner = THIS_MODULE, }; #ifdef CONFIG_PROC_FS static int pfkey_seq_show(struct seq_file *f, void *v) { struct sock *s = sk_entry(v); if (v == SEQ_START_TOKEN) seq_printf(f ,"sk RefCnt Rmem Wmem User Inode\n"); else seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n", s, refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), sock_i_ino(s) ); return 0; } static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) __acquires(rcu) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); rcu_read_lock(); return seq_hlist_start_head_rcu(&net_pfkey->table, *ppos); } static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); return seq_hlist_next_rcu(v, &net_pfkey->table, ppos); } static void pfkey_seq_stop(struct seq_file *f, void *v) __releases(rcu) { rcu_read_unlock(); } static const struct seq_operations pfkey_seq_ops = { .start = pfkey_seq_start, .next = pfkey_seq_next, .stop = pfkey_seq_stop, .show = pfkey_seq_show, }; static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; e = proc_create_net("pfkey", 0, net->proc_net, &pfkey_seq_ops, sizeof(struct seq_net_private)); if (e == NULL) return -ENOMEM; return 0; } static void __net_exit pfkey_exit_proc(struct net *net) { remove_proc_entry("pfkey", net->proc_net); } #else static inline int pfkey_init_proc(struct net *net) { return 0; } static inline void pfkey_exit_proc(struct net *net) { } #endif static struct xfrm_mgr pfkeyv2_mgr = { .notify = pfkey_send_notify, .acquire = pfkey_send_acquire, .compile_policy = pfkey_compile_policy, .new_mapping = pfkey_send_new_mapping, .notify_policy = pfkey_send_policy_notify, .migrate = pfkey_send_migrate, .is_alive = pfkey_is_alive, }; static int __net_init pfkey_net_init(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); int rv; INIT_HLIST_HEAD(&net_pfkey->table); atomic_set(&net_pfkey->socks_nr, 0); rv = pfkey_init_proc(net); return rv; } static void __net_exit pfkey_net_exit(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_exit_proc(net); WARN_ON(!hlist_empty(&net_pfkey->table)); } static struct pernet_operations pfkey_net_ops = { .init = pfkey_net_init, .exit = pfkey_net_exit, .id = &pfkey_net_id, .size = sizeof(struct netns_pfkey), }; static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); unregister_pernet_subsys(&pfkey_net_ops); proto_unregister(&key_proto); } static int __init ipsec_pfkey_init(void) { int err = proto_register(&key_proto, 0); if (err != 0) goto out; err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_unregister_key_proto; err = sock_register(&pfkey_family_ops); if (err != 0) goto out_unregister_pernet; xfrm_register_km(&pfkeyv2_mgr); out: return err; out_unregister_pernet: unregister_pernet_subsys(&pfkey_net_ops); out_unregister_key_proto: proto_unregister(&key_proto); goto out; } module_init(ipsec_pfkey_init); module_exit(ipsec_pfkey_exit); MODULE_DESCRIPTION("PF_KEY socket helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_KEY); |
3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | // SPDX-License-Identifier: GPL-2.0-only /* * ebt_pkttype * * Authors: * Bart De Schuymer <bdschuym@pandora.be> * * April, 2003 * */ #include <linux/module.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_pkttype.h> static bool ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_pkttype_info *info = par->matchinfo; return (skb->pkt_type == info->pkt_type) ^ info->invert; } static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par) { const struct ebt_pkttype_info *info = par->matchinfo; if (info->invert != 0 && info->invert != 1) return -EINVAL; /* Allow any pkt_type value */ return 0; } static struct xt_match ebt_pkttype_mt_reg __read_mostly = { .name = "pkttype", .revision = 0, .family = NFPROTO_BRIDGE, .match = ebt_pkttype_mt, .checkentry = ebt_pkttype_mt_check, .matchsize = sizeof(struct ebt_pkttype_info), .me = THIS_MODULE, }; static int __init ebt_pkttype_init(void) { return xt_register_match(&ebt_pkttype_mt_reg); } static void __exit ebt_pkttype_fini(void) { xt_unregister_match(&ebt_pkttype_mt_reg); } module_init(ebt_pkttype_init); module_exit(ebt_pkttype_fini); MODULE_DESCRIPTION("Ebtables: Link layer packet type match"); MODULE_LICENSE("GPL"); |
8 195 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Statically sized hash table implementation * (C) 2012 Sasha Levin <levinsasha928@gmail.com> */ #ifndef _LINUX_HASHTABLE_H #define _LINUX_HASHTABLE_H #include <linux/list.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/hash.h> #include <linux/rculist.h> #define DEFINE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] __read_mostly = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] #define HASH_SIZE(name) (ARRAY_SIZE(name)) #define HASH_BITS(name) ilog2(HASH_SIZE(name)) /* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ #define hash_min(val, bits) \ (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) static inline void __hash_init(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) INIT_HLIST_HEAD(&ht[i]); } /** * hash_init - initialize a hash table * @hashtable: hashtable to be initialized * * Calculates the size of the hashtable from the given parameter, otherwise * same as hash_init_size. * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) /** * hash_add - add an object to a hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add(hashtable, node, key) \ hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_add_rcu - add an object to a rcu enabled hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add_rcu(hashtable, node, key) \ hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_hashed - check whether an object is in any hashtable * @node: the &struct hlist_node of the object to be checked */ static inline bool hash_hashed(struct hlist_node *node) { return !hlist_unhashed(node); } static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) if (!hlist_empty(&ht[i])) return false; return true; } /** * hash_empty - check whether a hashtable is empty * @hashtable: hashtable to check * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) /** * hash_del - remove an object from a hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del(struct hlist_node *node) { hlist_del_init(node); } /** * hash_del_rcu - remove an object from a rcu enabled hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del_rcu(struct hlist_node *node) { hlist_del_init_rcu(node); } /** * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_rcu(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @tmp: a &struct hlist_node used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_safe(name, bkt, tmp, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible(name, obj, member, key) \ hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the * same bucket in an rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_rcu(name, obj, member, key, cond...) \ hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ member, ## cond) /** * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing * to the same bucket in an rcu enabled hashtable in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over * * This is the same as hash_for_each_possible_rcu() except that it does * not do any RCU debugging or tracing. */ #define hash_for_each_possible_rcu_notrace(name, obj, member, key) \ hlist_for_each_entry_rcu_notrace(obj, \ &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @tmp: a &struct hlist_node used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_safe(name, obj, tmp, member, key) \ hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) #endif |
5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 | // SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/auth_gss/auth_gss.c * * RPCSEC_GSS client authentication. * * Copyright (c) 2000 The Regents of the University of Michigan. * All rights reserved. * * Dug Song <dugsong@monkey.org> * Andy Adamson <andros@umich.edu> */ #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/pagemap.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_krb5.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/gss_err.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/gss_api.h> #include <linux/uaccess.h> #include <linux/hashtable.h> #include "auth_gss_internal.h" #include "../netns.h" #include <trace/events/rpcgss.h> static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; static const struct rpc_credops gss_nullops; #define GSS_RETRY_EXPIRED 5 static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; #define GSS_KEY_EXPIRE_TIMEO 240 static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif /* * This compile-time check verifies that we will not exceed the * slack space allotted by the client and server auth_gss code * before they call gss_wrap(). */ #define GSS_KRB5_MAX_SLACK_NEEDED \ (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \ + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \ + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \ + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \ + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */ \ + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \ + XDR_UNIT * 2 /* RPC verifier */ \ + GSS_KRB5_TOK_HDR_LEN \ + GSS_KRB5_MAX_CKSUM_LEN) #define GSS_CRED_SLACK (RPC_MAX_AUTH_SIZE * 2) /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 static DEFINE_HASHTABLE(gss_auth_hash_table, 4); static DEFINE_SPINLOCK(gss_auth_hash_lock); struct gss_pipe { struct rpc_pipe_dir_object pdo; struct rpc_pipe *pipe; struct rpc_clnt *clnt; const char *name; struct kref kref; }; struct gss_auth { struct kref kref; struct hlist_node hash; struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; struct rpc_clnt *client; struct net *net; netns_tracker ns_tracker; /* * There are two upcall pipes; dentry[1], named "gssd", is used * for the new text-based upcall; dentry[0] is named after the * mechanism (for example, "krb5") and exists for * backwards-compatibility with older gssd's. */ struct gss_pipe *gss_pipe[2]; const char *target_name; }; /* pipe_version >= 0 if and only if someone has a pipe open. */ static DEFINE_SPINLOCK(pipe_version_lock); static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); static void gss_put_auth(struct gss_auth *gss_auth); static void gss_free_ctx(struct gss_cl_ctx *); static const struct rpc_pipe_ops gss_upcall_ops_v0; static const struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) { refcount_inc(&ctx->count); return ctx; } static inline void gss_put_ctx(struct gss_cl_ctx *ctx) { if (refcount_dec_and_test(&ctx->count)) gss_free_ctx(ctx); } /* gss_cred_set_ctx: * called by gss_upcall_callback and gss_create_upcall in order * to set the gss context. The actual exchange of an old context * and a new one is protected by the pipe->lock. */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) return; gss_get_ctx(ctx); rcu_assign_pointer(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); smp_mb__before_atomic(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); } static struct gss_cl_ctx * gss_cred_get_ctx(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = NULL; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (ctx) gss_get_ctx(ctx); rcu_read_unlock(); return ctx; } static struct gss_cl_ctx * gss_alloc_context(void) { struct gss_cl_ctx *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ spin_lock_init(&ctx->gc_seq_lock); refcount_set(&ctx->count,1); } return ctx; } #define GSSD_MIN_TIMEOUT (60 * 60) static const void * gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct gss_api_mech *gm) { const void *q; unsigned int seclen; unsigned int timeout; unsigned long now = jiffies; u32 window_size; int ret; /* First unsigned int gives the remaining lifetime in seconds of the * credential - e.g. the remaining TGT lifetime for Kerberos or * the -t value passed to GSSD. */ p = simple_get_bytes(p, end, &timeout, sizeof(timeout)); if (IS_ERR(p)) goto err; if (timeout == 0) timeout = GSSD_MIN_TIMEOUT; ctx->gc_expiry = now + ((unsigned long)timeout * HZ); /* Sequence number window. Determines the maximum number of * simultaneous requests */ p = simple_get_bytes(p, end, &window_size, sizeof(window_size)); if (IS_ERR(p)) goto err; ctx->gc_win = window_size; /* gssd signals an error by passing ctx->gc_win = 0: */ if (ctx->gc_win == 0) { /* * in which case, p points to an error code. Anything other * than -EKEYEXPIRED gets converted to -EACCES. */ p = simple_get_bytes(p, end, &ret, sizeof(ret)); if (!IS_ERR(p)) p = (ret == -EKEYEXPIRED) ? ERR_PTR(-EKEYEXPIRED) : ERR_PTR(-EACCES); goto err; } /* copy the opaque wire context */ p = simple_get_netobj(p, end, &ctx->gc_wire_ctx); if (IS_ERR(p)) goto err; /* import the opaque security context */ p = simple_get_bytes(p, end, &seclen, sizeof(seclen)); if (IS_ERR(p)) goto err; q = (const void *)((const char *)p + seclen); if (unlikely(q > end || q < p)) { p = ERR_PTR(-EFAULT); goto err; } ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_KERNEL); if (ret < 0) { trace_rpcgss_import_ctx(ret); p = ERR_PTR(ret); goto err; } /* is there any trailing data? */ if (q == end) { p = q; goto done; } /* pull in acceptor name (if there is one) */ p = simple_get_netobj(q, end, &ctx->gc_acceptor); if (IS_ERR(p)) goto err; done: trace_rpcgss_context(window_size, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, ctx->gc_acceptor.data); err: return p; } /* XXX: Need some documentation about why UPCALL_BUF_LEN is so small. * Is user space expecting no more than UPCALL_BUF_LEN bytes? * Note that there are now _two_ NI_MAXHOST sized data items * being passed in this string. */ #define UPCALL_BUF_LEN 256 struct gss_upcall_msg { refcount_t count; kuid_t uid; const char *service_name; struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; struct rpc_pipe *pipe; struct rpc_wait_queue rpc_waitqueue; wait_queue_head_t waitqueue; struct gss_cl_ctx *ctx; char databuf[UPCALL_BUF_LEN]; }; static int get_pipe_version(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret; spin_lock(&pipe_version_lock); if (sn->pipe_version >= 0) { atomic_inc(&sn->pipe_users); ret = sn->pipe_version; } else ret = -EAGAIN; spin_unlock(&pipe_version_lock); return ret; } static void put_pipe_version(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); if (atomic_dec_and_lock(&sn->pipe_users, &pipe_version_lock)) { sn->pipe_version = -1; spin_unlock(&pipe_version_lock); } } static void gss_release_msg(struct gss_upcall_msg *gss_msg) { struct net *net = gss_msg->auth->net; if (!refcount_dec_and_test(&gss_msg->count)) return; put_pipe_version(net); BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); gss_put_auth(gss_msg->auth); kfree_const(gss_msg->service_name); kfree(gss_msg); } static struct gss_upcall_msg * __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth) { struct gss_upcall_msg *pos; list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; if (pos->auth->service != auth->service) continue; refcount_inc(&pos->count); return pos; } return NULL; } /* Try to add an upcall to the pipefs queue. * If an upcall owned by our uid already exists, then we return a reference * to that upcall instead of adding the new upcall. */ static inline struct gss_upcall_msg * gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_pipe *pipe = gss_msg->pipe; struct gss_upcall_msg *old; spin_lock(&pipe->lock); old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth); if (old == NULL) { refcount_inc(&gss_msg->count); list_add(&gss_msg->list, &pipe->in_downcall); } else gss_msg = old; spin_unlock(&pipe->lock); return gss_msg; } static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { list_del_init(&gss_msg->list); rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); wake_up_all(&gss_msg->waitqueue); refcount_dec(&gss_msg->count); } static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { struct rpc_pipe *pipe = gss_msg->pipe; if (list_empty(&gss_msg->list)) return; spin_lock(&pipe->lock); if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); spin_unlock(&pipe->lock); } static void gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg) { switch (gss_msg->msg.errno) { case 0: if (gss_msg->ctx == NULL) break; clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags); gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx); break; case -EKEYEXPIRED: set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags); } gss_cred->gc_upcall_timestamp = jiffies; gss_cred->gc_upcall = NULL; rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); } static void gss_upcall_callback(struct rpc_task *task) { struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; struct rpc_pipe *pipe = gss_msg->pipe; spin_lock(&pipe->lock); gss_handle_downcall_result(gss_cred, gss_msg); spin_unlock(&pipe->lock); task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg, const struct cred *cred) { struct user_namespace *userns = cred->user_ns; uid_t uid = from_kuid_munged(userns, gss_msg->uid); memcpy(gss_msg->databuf, &uid, sizeof(uid)); gss_msg->msg.data = gss_msg->databuf; gss_msg->msg.len = sizeof(uid); BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf)); } static ssize_t gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *buf, size_t buflen) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); if (msg->copied == 0) gss_encode_v0_msg(gss_msg, file->f_cred); return rpc_pipe_generic_upcall(file, msg, buf, buflen); } static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, const char *service_name, const char *target_name, const struct cred *cred) { struct user_namespace *userns = cred->user_ns; struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; size_t buflen = sizeof(gss_msg->databuf); int len; len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name, from_kuid_munged(userns, gss_msg->uid)); buflen -= len; p += len; gss_msg->msg.len = len; /* * target= is a full service principal that names the remote * identity that we are authenticating to. */ if (target_name) { len = scnprintf(p, buflen, " target=%s", target_name); buflen -= len; p += len; gss_msg->msg.len += len; } /* * gssd uses service= and srchost= to select a matching key from * the system's keytab to use as the source principal. * * service= is the service name part of the source principal, * or "*" (meaning choose any). * * srchost= is the hostname part of the source principal. When * not provided, gssd uses the local hostname. */ if (service_name) { char *c = strchr(service_name, '@'); if (!c) len = scnprintf(p, buflen, " service=%s", service_name); else len = scnprintf(p, buflen, " service=%.*s srchost=%s", (int)(c - service_name), service_name, c + 1); buflen -= len; p += len; gss_msg->msg.len += len; } if (mech->gm_upcall_enctypes) { len = scnprintf(p, buflen, " enctypes=%s", mech->gm_upcall_enctypes); buflen -= len; p += len; gss_msg->msg.len += len; } trace_rpcgss_upcall_msg(gss_msg->databuf); len = scnprintf(p, buflen, "\n"); if (len == 0) goto out_overflow; gss_msg->msg.len += len; gss_msg->msg.data = gss_msg->databuf; return 0; out_overflow: WARN_ON_ONCE(1); return -ENOMEM; } static ssize_t gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *buf, size_t buflen) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); int err; if (msg->copied == 0) { err = gss_encode_v1_msg(gss_msg, gss_msg->service_name, gss_msg->auth->target_name, file->f_cred); if (err) return err; } return rpc_pipe_generic_upcall(file, msg, buf, buflen); } static struct gss_upcall_msg * gss_alloc_msg(struct gss_auth *gss_auth, kuid_t uid, const char *service_name) { struct gss_upcall_msg *gss_msg; int vers; int err = -ENOMEM; gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); if (gss_msg == NULL) goto err; vers = get_pipe_version(gss_auth->net); err = vers; if (err < 0) goto err_free_msg; gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); refcount_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; kref_get(&gss_auth->kref); if (service_name) { gss_msg->service_name = kstrdup_const(service_name, GFP_KERNEL); if (!gss_msg->service_name) { err = -ENOMEM; goto err_put_pipe_version; } } return gss_msg; err_put_pipe_version: put_pipe_version(gss_auth->net); err_free_msg: kfree(gss_msg); err: return ERR_PTR(err); } static struct gss_upcall_msg * gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_new, *gss_msg; kuid_t uid = cred->cr_cred->fsuid; gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal); if (IS_ERR(gss_new)) return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { int res; refcount_inc(&gss_msg->count); res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); refcount_dec(&gss_msg->count); gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } } else gss_release_msg(gss_new); return gss_msg; } static void warn_gssd(void) { dprintk("AUTH_GSS upcall failed. Please check user daemon is running.\n"); } static inline int gss_refresh_upcall(struct rpc_task *task) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; struct rpc_pipe *pipe; int err = 0; gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue, task, NULL, jiffies + (15 * HZ)); err = -EAGAIN; goto out; } if (IS_ERR(gss_msg)) { err = PTR_ERR(gss_msg); goto out; } pipe = gss_msg->pipe; spin_lock(&pipe->lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ refcount_inc(&gss_msg->count); rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); } else { gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; } spin_unlock(&pipe->lock); gss_release_msg(gss_msg); out: trace_rpcgss_upcall_result(from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); return err; } static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { struct net *net = gss_auth->net; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); int err; retry: err = 0; /* if gssd is down, just skip upcalling altogether */ if (!gssd_running(net)) { warn_gssd(); err = -EACCES; goto out; } gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, sn->pipe_version >= 0, 15 * HZ); if (sn->pipe_version < 0) { warn_gssd(); err = -EACCES; } if (err < 0) goto out; goto retry; } if (IS_ERR(gss_msg)) { err = PTR_ERR(gss_msg); goto out; } pipe = gss_msg->pipe; for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); spin_lock(&pipe->lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } spin_unlock(&pipe->lock); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; } schedule(); } if (gss_msg->ctx) { trace_rpcgss_ctx_init(gss_cred); gss_cred_set_ctx(cred, gss_msg->ctx); } else { err = gss_msg->msg.errno; } spin_unlock(&pipe->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); out: trace_rpcgss_upcall_result(from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); return err; } static struct gss_upcall_msg * gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) { struct gss_upcall_msg *pos; list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; if (!rpc_msg_is_inflight(&pos->msg)) continue; refcount_inc(&pos->count); return pos; } return NULL; } #define MSG_BUF_MAXSIZE 1024 static ssize_t gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; struct rpc_pipe *pipe = RPC_I(file_inode(filp))->pipe; struct gss_cl_ctx *ctx; uid_t id; kuid_t uid; ssize_t err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; buf = kmalloc(mlen, GFP_KERNEL); if (!buf) goto out; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; end = (const void *)((char *)buf + mlen); p = simple_get_bytes(buf, end, &id, sizeof(id)); if (IS_ERR(p)) { err = PTR_ERR(p); goto err; } uid = make_kuid(current_user_ns(), id); if (!uid_valid(uid)) { err = -EINVAL; goto err; } err = -ENOMEM; ctx = gss_alloc_context(); if (ctx == NULL) goto err; err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); gss_msg = gss_find_downcall(pipe, uid); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; } list_del_init(&gss_msg->list); spin_unlock(&pipe->lock); p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); switch (err) { case -EACCES: case -EKEYEXPIRED: gss_msg->msg.errno = err; err = mlen; break; case -EFAULT: case -ENOMEM: case -EINVAL: case -ENOSYS: gss_msg->msg.errno = -EAGAIN; break; default: printk(KERN_CRIT "%s: bad return from " "gss_fill_context: %zd\n", __func__, err); gss_msg->msg.errno = -EIO; } goto err_release_msg; } gss_msg->ctx = gss_get_ctx(ctx); err = mlen; err_release_msg: spin_lock(&pipe->lock); __gss_unhash_msg(gss_msg); spin_unlock(&pipe->lock); gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); err: kfree(buf); out: return err; } static int gss_pipe_open(struct inode *inode, int new_version) { struct net *net = inode->i_sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret = 0; spin_lock(&pipe_version_lock); if (sn->pipe_version < 0) { /* First open of any gss pipe determines the version: */ sn->pipe_version = new_version; rpc_wake_up(&pipe_version_rpc_waitqueue); wake_up(&pipe_version_waitqueue); } else if (sn->pipe_version != new_version) { /* Trying to open a pipe of a different version */ ret = -EBUSY; goto out; } atomic_inc(&sn->pipe_users); out: spin_unlock(&pipe_version_lock); return ret; } static int gss_pipe_open_v0(struct inode *inode) { return gss_pipe_open(inode, 0); } static int gss_pipe_open_v1(struct inode *inode) { return gss_pipe_open(inode, 1); } static void gss_pipe_release(struct inode *inode) { struct net *net = inode->i_sb->s_fs_info; struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct gss_upcall_msg *gss_msg; restart: spin_lock(&pipe->lock); list_for_each_entry(gss_msg, &pipe->in_downcall, list) { if (!list_empty(&gss_msg->msg.list)) continue; gss_msg->msg.errno = -EPIPE; refcount_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); spin_unlock(&pipe->lock); gss_release_msg(gss_msg); goto restart; } spin_unlock(&pipe->lock); put_pipe_version(net); } static void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); if (msg->errno < 0) { refcount_inc(&gss_msg->count); gss_unhash_msg(gss_msg); if (msg->errno == -ETIMEDOUT) warn_gssd(); gss_release_msg(gss_msg); } gss_release_msg(gss_msg); } static void gss_pipe_dentry_destroy(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct gss_pipe *gss_pipe = pdo->pdo_data; struct rpc_pipe *pipe = gss_pipe->pipe; if (pipe->dentry != NULL) { rpc_unlink(pipe->dentry); pipe->dentry = NULL; } } static int gss_pipe_dentry_create(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct gss_pipe *p = pdo->pdo_data; struct dentry *dentry; dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe); if (IS_ERR(dentry)) return PTR_ERR(dentry); p->pipe->dentry = dentry; return 0; } static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = { .create = gss_pipe_dentry_create, .destroy = gss_pipe_dentry_destroy, }; static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt, const char *name, const struct rpc_pipe_ops *upcall_ops) { struct gss_pipe *p; int err = -ENOMEM; p = kmalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) goto err; p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(p->pipe)) { err = PTR_ERR(p->pipe); goto err_free_gss_pipe; } p->name = name; p->clnt = clnt; kref_init(&p->kref); rpc_init_pipe_dir_object(&p->pdo, &gss_pipe_dir_object_ops, p); return p; err_free_gss_pipe: kfree(p); err: return ERR_PTR(err); } struct gss_alloc_pdo { struct rpc_clnt *clnt; const char *name; const struct rpc_pipe_ops *upcall_ops; }; static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data) { struct gss_pipe *gss_pipe; struct gss_alloc_pdo *args = data; if (pdo->pdo_ops != &gss_pipe_dir_object_ops) return 0; gss_pipe = container_of(pdo, struct gss_pipe, pdo); if (strcmp(gss_pipe->name, args->name) != 0) return 0; if (!kref_get_unless_zero(&gss_pipe->kref)) return 0; return 1; } static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data) { struct gss_pipe *gss_pipe; struct gss_alloc_pdo *args = data; gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops); if (!IS_ERR(gss_pipe)) return &gss_pipe->pdo; return NULL; } static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt, const char *name, const struct rpc_pipe_ops *upcall_ops) { struct net *net = rpc_net_ns(clnt); struct rpc_pipe_dir_object *pdo; struct gss_alloc_pdo args = { .clnt = clnt, .name = name, .upcall_ops = upcall_ops, }; pdo = rpc_find_or_alloc_pipe_dir_object(net, &clnt->cl_pipedir_objects, gss_pipe_match_pdo, gss_pipe_alloc_pdo, &args); if (pdo != NULL) return container_of(pdo, struct gss_pipe, pdo); return ERR_PTR(-ENOMEM); } static void __gss_pipe_free(struct gss_pipe *p) { struct rpc_clnt *clnt = p->clnt; struct net *net = rpc_net_ns(clnt); rpc_remove_pipe_dir_object(net, &clnt->cl_pipedir_objects, &p->pdo); rpc_destroy_pipe_data(p->pipe); kfree(p); } static void __gss_pipe_release(struct kref *kref) { struct gss_pipe *p = container_of(kref, struct gss_pipe, kref); __gss_pipe_free(p); } static void gss_pipe_free(struct gss_pipe *p) { if (p != NULL) kref_put(&p->kref, __gss_pipe_release); } /* * NOTE: we have the opportunity to use different * parameters based on the input flavor (which must be a pseudoflavor) */ static struct gss_auth * gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { rpc_authflavor_t flavor = args->pseudoflavor; struct gss_auth *gss_auth; struct gss_pipe *gss_pipe; struct rpc_auth * auth; int err = -ENOMEM; /* XXX? */ if (!try_module_get(THIS_MODULE)) return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; INIT_HLIST_NODE(&gss_auth->hash); gss_auth->target_name = NULL; if (args->target_name) { gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL); if (gss_auth->target_name == NULL) goto err_free; } gss_auth->client = clnt; gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker, GFP_KERNEL); err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) goto err_put_net; gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; if (!gssd_running(gss_auth->net)) goto err_put_mech; auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE); auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2; auth->au_verfsize = GSS_VERF_SLACK >> 2; auth->au_ralign = GSS_VERF_SLACK >> 2; __set_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags); auth->au_ops = &authgss_ops; auth->au_flavor = flavor; if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor)) __set_bit(RPCAUTH_AUTH_DATATOUCH, &auth->au_flags); refcount_set(&auth->au_count, 1); kref_init(&gss_auth->kref); err = rpcauth_init_credcache(auth); if (err) goto err_put_mech; /* * Note: if we created the old pipe first, then someone who * examined the directory at the right moment might conclude * that we supported only the old pipe. So we instead create * the new pipe first. */ gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1); if (IS_ERR(gss_pipe)) { err = PTR_ERR(gss_pipe); goto err_destroy_credcache; } gss_auth->gss_pipe[1] = gss_pipe; gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name, &gss_upcall_ops_v0); if (IS_ERR(gss_pipe)) { err = PTR_ERR(gss_pipe); goto err_destroy_pipe_1; } gss_auth->gss_pipe[0] = gss_pipe; return gss_auth; err_destroy_pipe_1: gss_pipe_free(gss_auth->gss_pipe[1]); err_destroy_credcache: rpcauth_destroy_credcache(auth); err_put_mech: gss_mech_put(gss_auth->mech); err_put_net: put_net_track(gss_auth->net, &gss_auth->ns_tracker); err_free: kfree(gss_auth->target_name); kfree(gss_auth); out_dec: module_put(THIS_MODULE); trace_rpcgss_createauth(flavor, err); return ERR_PTR(err); } static void gss_free(struct gss_auth *gss_auth) { gss_pipe_free(gss_auth->gss_pipe[0]); gss_pipe_free(gss_auth->gss_pipe[1]); gss_mech_put(gss_auth->mech); put_net_track(gss_auth->net, &gss_auth->ns_tracker); kfree(gss_auth->target_name); kfree(gss_auth); module_put(THIS_MODULE); } static void gss_free_callback(struct kref *kref) { struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref); gss_free(gss_auth); } static void gss_put_auth(struct gss_auth *gss_auth) { kref_put(&gss_auth->kref, gss_free_callback); } static void gss_destroy(struct rpc_auth *auth) { struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); if (hash_hashed(&gss_auth->hash)) { spin_lock(&gss_auth_hash_lock); hash_del(&gss_auth->hash); spin_unlock(&gss_auth_hash_lock); } gss_pipe_free(gss_auth->gss_pipe[0]); gss_auth->gss_pipe[0] = NULL; gss_pipe_free(gss_auth->gss_pipe[1]); gss_auth->gss_pipe[1] = NULL; rpcauth_destroy_credcache(auth); gss_put_auth(gss_auth); } /* * Auths may be shared between rpc clients that were cloned from a * common client with the same xprt, if they also share the flavor and * target_name. * * The auth is looked up from the oldest parent sharing the same * cl_xprt, and the auth itself references only that common parent * (which is guaranteed to last as long as any of its descendants). */ static struct gss_auth * gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt, struct gss_auth *new) { struct gss_auth *gss_auth; unsigned long hashval = (unsigned long)clnt; spin_lock(&gss_auth_hash_lock); hash_for_each_possible(gss_auth_hash_table, gss_auth, hash, hashval) { if (gss_auth->client != clnt) continue; if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor) continue; if (gss_auth->target_name != args->target_name) { if (gss_auth->target_name == NULL) continue; if (args->target_name == NULL) continue; if (strcmp(gss_auth->target_name, args->target_name)) continue; } if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count)) continue; goto out; } if (new) hash_add(gss_auth_hash_table, &new->hash, hashval); gss_auth = new; out: spin_unlock(&gss_auth_hash_lock); return gss_auth; } static struct gss_auth * gss_create_hashed(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { struct gss_auth *gss_auth; struct gss_auth *new; gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL); if (gss_auth != NULL) goto out; new = gss_create_new(args, clnt); if (IS_ERR(new)) return new; gss_auth = gss_auth_find_or_add_hashed(args, clnt, new); if (gss_auth != new) gss_destroy(&new->rpc_auth); out: return gss_auth; } static struct rpc_auth * gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { struct gss_auth *gss_auth; struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch); while (clnt != clnt->cl_parent) { struct rpc_clnt *parent = clnt->cl_parent; /* Find the original parent for this transport */ if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps) break; clnt = parent; } gss_auth = gss_create_hashed(args, clnt); if (IS_ERR(gss_auth)) return ERR_CAST(gss_auth); return &gss_auth->rpc_auth; } static struct gss_cred * gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { struct gss_cred *new; /* Make a copy of the cred so that we can reference count it */ new = kzalloc(sizeof(*gss_cred), GFP_KERNEL); if (new) { struct auth_cred acred = { .cred = gss_cred->gc_base.cr_cred, }; struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); rpcauth_init_cred(&new->gc_base, &acred, &gss_auth->rpc_auth, &gss_nullops); new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; new->gc_service = gss_cred->gc_service; new->gc_principal = gss_cred->gc_principal; kref_get(&gss_auth->kref); rcu_assign_pointer(new->gc_ctx, ctx); gss_get_ctx(ctx); } return new; } /* * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call * to the server with the GSS control procedure field set to * RPC_GSS_PROC_DESTROY. This should normally cause the server to release * all RPCSEC_GSS state associated with that context. */ static void gss_send_destroy_context(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); struct gss_cred *new; struct rpc_task *task; new = gss_dup_cred(gss_auth, gss_cred); if (new) { ctx->gc_proc = RPC_GSS_PROC_DESTROY; trace_rpcgss_ctx_destroy(gss_cred); task = rpc_call_null(gss_auth->client, &new->gc_base, RPC_TASK_ASYNC); if (!IS_ERR(task)) rpc_put_task(task); put_rpccred(&new->gc_base); } } /* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure * to create a new cred or context, so they check that things have been * allocated before freeing them. */ static void gss_do_free_ctx(struct gss_cl_ctx *ctx) { gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); kfree(ctx->gc_acceptor.data); kfree(ctx); } static void gss_free_ctx_callback(struct rcu_head *head) { struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu); gss_do_free_ctx(ctx); } static void gss_free_ctx(struct gss_cl_ctx *ctx) { call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); } static void gss_free_cred(struct gss_cred *gss_cred) { kfree(gss_cred); } static void gss_free_cred_callback(struct rcu_head *head) { struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu); gss_free_cred(gss_cred); } static void gss_destroy_nullcred(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); put_cred(cred->cr_cred); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); gss_put_auth(gss_auth); } static void gss_destroy_cred(struct rpc_cred *cred) { if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) gss_send_destroy_context(cred); gss_destroy_nullcred(cred); } static int gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) { return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits); } /* * Lookup RPCSEC_GSS cred for the current process */ static struct rpc_cred *gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { return rpcauth_lookup_credcache(auth, acred, flags, rpc_task_gfp_mask()); } static struct rpc_cred * gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) { struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); struct gss_cred *cred = NULL; int err = -ENOMEM; if (!(cred = kzalloc(sizeof(*cred), gfp))) goto out_err; rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); /* * Note: in order to force a call to call_refresh(), we deliberately * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; cred->gc_principal = acred->principal; kref_get(&gss_auth->kref); return &cred->gc_base; out_err: return ERR_PTR(err); } static int gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred) { struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred,struct gss_cred, gc_base); int err; do { err = gss_create_upcall(gss_auth, gss_cred); } while (err == -EAGAIN); return err; } static char * gss_stringify_acceptor(struct rpc_cred *cred) { char *string = NULL; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx; unsigned int len; struct xdr_netobj *acceptor; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (!ctx) goto out; len = ctx->gc_acceptor.len; rcu_read_unlock(); /* no point if there's no string */ if (!len) return NULL; realloc: string = kmalloc(len + 1, GFP_KERNEL); if (!string) return NULL; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); /* did the ctx disappear or was it replaced by one with no acceptor? */ if (!ctx || !ctx->gc_acceptor.len) { kfree(string); string = NULL; goto out; } acceptor = &ctx->gc_acceptor; /* * Did we find a new acceptor that's longer than the original? Allocate * a longer buffer and try again. */ if (len < acceptor->len) { len = acceptor->len; rcu_read_unlock(); kfree(string); goto realloc; } memcpy(string, acceptor->data, acceptor->len); string[acceptor->len] = '\0'; out: rcu_read_unlock(); return string; } /* * Returns -EACCES if GSS context is NULL or will expire within the * timeout (miliseconds) */ static int gss_key_timeout(struct rpc_cred *rc) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); struct gss_cl_ctx *ctx; unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ); int ret = 0; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (!ctx || time_after(timeout, ctx->gc_expiry)) ret = -EACCES; rcu_read_unlock(); return ret; } static int gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); struct gss_cl_ctx *ctx; int ret; if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (!ctx || time_after(jiffies, ctx->gc_expiry)) { rcu_read_unlock(); return 0; } rcu_read_unlock(); if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) return 0; out: if (acred->principal != NULL) { if (gss_cred->gc_principal == NULL) return 0; ret = strcmp(acred->principal, gss_cred->gc_principal) == 0; } else { if (gss_cred->gc_principal != NULL) return 0; ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid); } return ret; } /* * Marshal credentials. * * The expensive part is computing the verifier. We can't cache a * pre-computed version of the verifier because the seqno, which * is different every time, is included in the MIC. */ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 *p, *cred_len; u32 maj_stat = 0; struct xdr_netobj mic; struct kvec iov; struct xdr_buf verf_buf; int status; /* Credential */ p = xdr_reserve_space(xdr, 7 * sizeof(*p) + ctx->gc_wire_ctx.len); if (!p) goto marshal_failed; *p++ = rpc_auth_gss; cred_len = p++; spin_lock(&ctx->gc_seq_lock); req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ; spin_unlock(&ctx->gc_seq_lock); if (req->rq_seqno == MAXSEQ) goto expired; trace_rpcgss_seqno(task); *p++ = cpu_to_be32(RPC_GSS_VERSION); *p++ = cpu_to_be32(ctx->gc_proc); *p++ = cpu_to_be32(req->rq_seqno); *p++ = cpu_to_be32(gss_cred->gc_service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); *cred_len = cpu_to_be32((p - (cred_len + 1)) << 2); /* Verifier */ /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ iov.iov_base = req->rq_snd_buf.head[0].iov_base; iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); p = xdr_reserve_space(xdr, sizeof(*p)); if (!p) goto marshal_failed; *p++ = rpc_auth_gss; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) goto expired; else if (maj_stat != 0) goto bad_mic; if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) goto marshal_failed; status = 0; out: gss_put_ctx(ctx); return status; expired: clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); status = -EKEYEXPIRED; goto out; marshal_failed: status = -EMSGSIZE; goto out; bad_mic: trace_rpcgss_get_mic(task, maj_stat); status = -EIO; goto out; } static int gss_renew_cred(struct rpc_task *task) { struct rpc_cred *oldcred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(oldcred, struct gss_cred, gc_base); struct rpc_auth *auth = oldcred->cr_auth; struct auth_cred acred = { .cred = oldcred->cr_cred, .principal = gss_cred->gc_principal, }; struct rpc_cred *new; new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(new)) return PTR_ERR(new); task->tk_rqstp->rq_cred = new; put_rpccred(oldcred); return 0; } static int gss_cred_is_negative_entry(struct rpc_cred *cred) { if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) { unsigned long now = jiffies; unsigned long begin, expire; struct gss_cred *gss_cred; gss_cred = container_of(cred, struct gss_cred, gc_base); begin = gss_cred->gc_upcall_timestamp; expire = begin + gss_expired_cred_retry_delay * HZ; if (time_in_range_open(now, begin, expire)) return 1; } return 0; } /* * Refresh credentials. XXX - finish */ static int gss_refresh(struct rpc_task *task) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; int ret = 0; if (gss_cred_is_negative_entry(cred)) return -EKEYEXPIRED; if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { ret = gss_renew_cred(task); if (ret < 0) goto out; cred = task->tk_rqstp->rq_cred; } if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) ret = gss_refresh_upcall(task); out: return ret; } /* Dummy refresh routine: used only when destroying the context */ static int gss_refresh_null(struct rpc_task *task) { return 0; } static int gss_validate(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 *p, *seq = NULL; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; u32 len, maj_stat; int status; p = xdr_inline_decode(xdr, 2 * sizeof(*p)); if (!p) goto validate_failed; if (*p++ != rpc_auth_gss) goto validate_failed; len = be32_to_cpup(p); if (len > RPC_MAX_AUTH_SIZE) goto validate_failed; p = xdr_inline_decode(xdr, len); if (!p) goto validate_failed; seq = kmalloc(4, GFP_KERNEL); if (!seq) goto validate_failed; *seq = cpu_to_be32(task->tk_rqstp->rq_seqno); iov.iov_base = seq; iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_buf); mic.data = (u8 *)p; mic.len = len; maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) goto bad_mic; /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ if (test_bit(RPCAUTH_AUTH_UPDATE_SLACK, &cred->cr_auth->au_flags)) cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; status = 0; out: gss_put_ctx(ctx); kfree(seq); return status; validate_failed: status = -EIO; goto out; bad_mic: trace_rpcgss_verify_mic(task, maj_stat); status = -EACCES; goto out; } static noinline_for_stack int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *rqstp = task->tk_rqstp; struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf; struct xdr_netobj mic; __be32 *p, *integ_len; u32 offset, maj_stat; p = xdr_reserve_space(xdr, 2 * sizeof(*p)); if (!p) goto wrap_failed; integ_len = p++; *p = cpu_to_be32(rqstp->rq_seqno); if (rpcauth_wrap_req_encode(task, xdr)) goto wrap_failed; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; if (xdr_buf_subsegment(snd_buf, &integ_buf, offset, snd_buf->len - offset)) goto wrap_failed; *integ_len = cpu_to_be32(integ_buf.len); p = xdr_reserve_space(xdr, 0); if (!p) goto wrap_failed; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) goto bad_mic; /* Check that the trailing MIC fit in the buffer, after the fact */ if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) goto wrap_failed; return 0; wrap_failed: return -EMSGSIZE; bad_mic: trace_rpcgss_get_mic(task, maj_stat); return -EIO; } static void priv_release_snd_buf(struct rpc_rqst *rqstp) { int i; for (i=0; i < rqstp->rq_enc_pages_num; i++) __free_page(rqstp->rq_enc_pages[i]); kfree(rqstp->rq_enc_pages); rqstp->rq_release_snd_buf = NULL; } static int alloc_enc_pages(struct rpc_rqst *rqstp) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; int first, last, i; if (rqstp->rq_release_snd_buf) rqstp->rq_release_snd_buf(rqstp); if (snd_buf->page_len == 0) { rqstp->rq_enc_pages_num = 0; return 0; } first = snd_buf->page_base >> PAGE_SHIFT; last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT; rqstp->rq_enc_pages_num = last - first + 1 + 1; rqstp->rq_enc_pages = kmalloc_array(rqstp->rq_enc_pages_num, sizeof(struct page *), GFP_KERNEL); if (!rqstp->rq_enc_pages) goto out; for (i=0; i < rqstp->rq_enc_pages_num; i++) { rqstp->rq_enc_pages[i] = alloc_page(GFP_KERNEL); if (rqstp->rq_enc_pages[i] == NULL) goto out_free; } rqstp->rq_release_snd_buf = priv_release_snd_buf; return 0; out_free: rqstp->rq_enc_pages_num = i; priv_release_snd_buf(rqstp); out: return -EAGAIN; } static noinline_for_stack int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *rqstp = task->tk_rqstp; struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; u32 pad, offset, maj_stat; int status; __be32 *p, *opaque_len; struct page **inpages; int first; struct kvec *iov; status = -EIO; p = xdr_reserve_space(xdr, 2 * sizeof(*p)); if (!p) goto wrap_failed; opaque_len = p++; *p = cpu_to_be32(rqstp->rq_seqno); if (rpcauth_wrap_req_encode(task, xdr)) goto wrap_failed; status = alloc_enc_pages(rqstp); if (unlikely(status)) goto wrap_failed; first = snd_buf->page_base >> PAGE_SHIFT; inpages = snd_buf->pages + first; snd_buf->pages = rqstp->rq_enc_pages; snd_buf->page_base -= first << PAGE_SHIFT; /* * Move the tail into its own page, in case gss_wrap needs * more space in the head when wrapping. * * Still... Why can't gss_wrap just slide the tail down? */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) { char *tmp; tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); snd_buf->tail[0].iov_base = tmp; } offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ if (unlikely(snd_buf->len > snd_buf->buflen)) { status = -EIO; goto wrap_failed; } /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) goto bad_wrap; *opaque_len = cpu_to_be32(snd_buf->len - offset); /* guess whether the pad goes into the head or the tail: */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) iov = snd_buf->tail; else iov = snd_buf->head; p = iov->iov_base + iov->iov_len; pad = xdr_pad_size(snd_buf->len - offset); memset(p, 0, pad); iov->iov_len += pad; snd_buf->len += pad; return 0; wrap_failed: return status; bad_wrap: trace_rpcgss_wrap(task, maj_stat); return -EIO; } static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status; status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) { /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ status = rpcauth_wrap_req_encode(task, xdr); goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: status = rpcauth_wrap_req_encode(task, xdr); break; case RPC_GSS_SVC_INTEGRITY: status = gss_wrap_req_integ(cred, ctx, task, xdr); break; case RPC_GSS_SVC_PRIVACY: status = gss_wrap_req_priv(cred, ctx, task, xdr); break; default: status = -EIO; } out: gss_put_ctx(ctx); return status; } /** * gss_update_rslack - Possibly update RPC receive buffer size estimates * @task: rpc_task for incoming RPC Reply being unwrapped * @cred: controlling rpc_cred for @task * @before: XDR words needed before each RPC Reply message * @after: XDR words needed following each RPC Reply message * */ static void gss_update_rslack(struct rpc_task *task, struct rpc_cred *cred, unsigned int before, unsigned int after) { struct rpc_auth *auth = cred->cr_auth; if (test_and_clear_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags)) { auth->au_ralign = auth->au_verfsize + before; auth->au_rslack = auth->au_verfsize + after; trace_rpcgss_update_slack(task, auth); } } static int gss_unwrap_resp_auth(struct rpc_task *task, struct rpc_cred *cred) { gss_update_rslack(task, cred, 0, 0); return 0; } /* * RFC 2203, Section 5.3.2.2 * * struct rpc_gss_integ_data { * opaque databody_integ<>; * opaque checksum<>; * }; * * struct rpc_gss_data_t { * unsigned int seq_num; * proc_req_arg_t arg; * }; */ static noinline_for_stack int gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, struct xdr_stream *xdr) { struct xdr_buf gss_data, *rcv_buf = &rqstp->rq_rcv_buf; u32 len, offset, seqno, maj_stat; struct xdr_netobj mic; int ret; ret = -EIO; mic.data = NULL; /* opaque databody_integ<>; */ if (xdr_stream_decode_u32(xdr, &len)) goto unwrap_failed; if (len & 3) goto unwrap_failed; offset = rcv_buf->len - xdr_stream_remaining(xdr); if (xdr_stream_decode_u32(xdr, &seqno)) goto unwrap_failed; if (seqno != rqstp->rq_seqno) goto bad_seqno; if (xdr_buf_subsegment(rcv_buf, &gss_data, offset, len)) goto unwrap_failed; /* * The xdr_stream now points to the beginning of the * upper layer payload, to be passed below to * rpcauth_unwrap_resp_decode(). The checksum, which * follows the upper layer payload in @rcv_buf, is * located and parsed without updating the xdr_stream. */ /* opaque checksum<>; */ offset += len; if (xdr_decode_word(rcv_buf, offset, &len)) goto unwrap_failed; offset += sizeof(__be32); if (offset + len > rcv_buf->len) goto unwrap_failed; mic.len = len; mic.data = kmalloc(len, GFP_KERNEL); if (ZERO_OR_NULL_PTR(mic.data)) goto unwrap_failed; if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len)) goto unwrap_failed; maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &gss_data, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) goto bad_mic; gss_update_rslack(task, cred, 2, 2 + 1 + XDR_QUADLEN(mic.len)); ret = 0; out: kfree(mic.data); return ret; unwrap_failed: trace_rpcgss_unwrap_failed(task); goto out; bad_seqno: trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, seqno); goto out; bad_mic: trace_rpcgss_verify_mic(task, maj_stat); goto out; } static noinline_for_stack int gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, struct xdr_stream *xdr) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; u32 offset, opaque_len, maj_stat; __be32 *p; p = xdr_inline_decode(xdr, 2 * sizeof(*p)); if (unlikely(!p)) goto unwrap_failed; opaque_len = be32_to_cpup(p++); offset = (u8 *)(p) - (u8 *)head->iov_base; if (offset + opaque_len > rcv_buf->len) goto unwrap_failed; maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, offset + opaque_len, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) goto bad_unwrap; /* gss_unwrap decrypted the sequence number */ if (be32_to_cpup(p++) != rqstp->rq_seqno) goto bad_seqno; /* gss_unwrap redacts the opaque blob from the head iovec. * rcv_buf has changed, thus the stream needs to be reset. */ xdr_init_decode(xdr, rcv_buf, p, rqstp); gss_update_rslack(task, cred, 2 + ctx->gc_gss_ctx->align, 2 + ctx->gc_gss_ctx->slack); return 0; unwrap_failed: trace_rpcgss_unwrap_failed(task); return -EIO; bad_seqno: trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p)); return -EIO; bad_unwrap: trace_rpcgss_unwrap(task, maj_stat); return -EIO; } static bool gss_seq_is_newer(u32 new, u32 old) { return (s32)(new - old) > 0; } static bool gss_xmit_need_reencode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); u32 win, seq_xmit = 0; bool ret = true; if (!ctx) goto out; if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq))) goto out_ctx; seq_xmit = READ_ONCE(ctx->gc_seq_xmit); while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) { u32 tmp = seq_xmit; seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno); if (seq_xmit == tmp) { ret = false; goto out_ctx; } } win = ctx->gc_win; if (win > 0) ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win); out_ctx: gss_put_ctx(ctx); out: trace_rpcgss_need_reencode(task, seq_xmit, ret); return ret; } static int gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *rqstp = task->tk_rqstp; struct rpc_cred *cred = rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) goto out_decode; switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: status = gss_unwrap_resp_auth(task, cred); break; case RPC_GSS_SVC_INTEGRITY: status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr); break; case RPC_GSS_SVC_PRIVACY: status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr); break; } if (status) goto out; out_decode: status = rpcauth_unwrap_resp_decode(task, xdr); out: gss_put_ctx(ctx); return status; } static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, .au_name = "RPCSEC_GSS", .create = gss_create, .destroy = gss_destroy, .hash_cred = gss_hash_cred, .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred, .info2flavor = gss_mech_info2flavor, .flavor2info = gss_mech_flavor2info, }; static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh, .crvalidate = gss_validate, .crwrap_req = gss_wrap_req, .crunwrap_resp = gss_unwrap_resp, .crkey_timeout = gss_key_timeout, .crstringify_acceptor = gss_stringify_acceptor, .crneed_reencode = gss_xmit_need_reencode, }; static const struct rpc_credops gss_nullops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_nullcred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh_null, .crvalidate = gss_validate, .crwrap_req = gss_wrap_req, .crunwrap_resp = gss_unwrap_resp, .crstringify_acceptor = gss_stringify_acceptor, }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { .upcall = gss_v0_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v0, .release_pipe = gss_pipe_release, }; static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .upcall = gss_v1_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v1, .release_pipe = gss_pipe_release, }; static __net_init int rpcsec_gss_init_net(struct net *net) { return gss_svc_init_net(net); } static __net_exit void rpcsec_gss_exit_net(struct net *net) { gss_svc_shutdown_net(net); } static struct pernet_operations rpcsec_gss_net_ops = { .init = rpcsec_gss_init_net, .exit = rpcsec_gss_exit_net, }; /* * Initialize RPCSEC_GSS module */ static int __init init_rpcsec_gss(void) { int err = 0; err = rpcauth_register(&authgss_ops); if (err) goto out; err = gss_svc_init(); if (err) goto out_unregister; err = register_pernet_subsys(&rpcsec_gss_net_ops); if (err) goto out_svc_exit; rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version"); return 0; out_svc_exit: gss_svc_shutdown(); out_unregister: rpcauth_unregister(&authgss_ops); out: return err; } static void __exit exit_rpcsec_gss(void) { unregister_pernet_subsys(&rpcsec_gss_net_ops); gss_svc_shutdown(); rpcauth_unregister(&authgss_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_ALIAS("rpc-auth-6"); MODULE_DESCRIPTION("Sun RPC Kerberos RPCSEC_GSS client authentication"); MODULE_LICENSE("GPL"); module_param_named(expired_cred_retry_delay, gss_expired_cred_retry_delay, uint, 0644); MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until " "the RPC engine retries an expired credential"); module_param_named(key_expire_timeo, gss_key_expire_timeo, uint, 0644); MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a " "credential keys lifetime where the NFS layer cleans up " "prior to key expiration"); module_init(init_rpcsec_gss) module_exit(exit_rpcsec_gss) |
872 876 803 77 77 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/spinlock.h> #include <linux/atomic.h> /* * This is an implementation of the notion of "decrement a * reference count, and return locked if it decremented to zero". * * NOTE NOTE NOTE! This is _not_ equivalent to * * if (atomic_dec_and_test(&atomic)) { * spin_lock(&lock); * return 1; * } * return 0; * * because the spin-lock and the decrement must be * "atomic". */ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; spin_unlock(lock); return 0; } EXPORT_SYMBOL(_atomic_dec_and_lock); int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, unsigned long *flags) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ spin_lock_irqsave(lock, *flags); if (atomic_dec_and_test(atomic)) return 1; spin_unlock_irqrestore(lock, *flags); return 0; } EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; raw_spin_unlock(lock); return 0; } EXPORT_SYMBOL(_atomic_dec_and_raw_lock); int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, unsigned long *flags) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ raw_spin_lock_irqsave(lock, *flags); if (atomic_dec_and_test(atomic)) return 1; raw_spin_unlock_irqrestore(lock, *flags); return 0; } EXPORT_SYMBOL(_atomic_dec_and_raw_lock_irqsave); |
20 1 19 40 33 6 28 8 2 2 1 1 1 1 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | /* * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/in.h> #include <linux/ipv6.h> #include "rds.h" #include "loop.h" static char * const rds_trans_modules[] = { [RDS_TRANS_IB] = "rds_rdma", [RDS_TRANS_GAP] = NULL, [RDS_TRANS_TCP] = "rds_tcp", }; static struct rds_transport *transports[RDS_TRANS_COUNT]; static DECLARE_RWSEM(rds_trans_sem); void rds_trans_register(struct rds_transport *trans) { BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ); down_write(&rds_trans_sem); if (transports[trans->t_type]) printk(KERN_ERR "RDS Transport type %d already registered\n", trans->t_type); else { transports[trans->t_type] = trans; printk(KERN_INFO "Registered RDS/%s transport\n", trans->t_name); } up_write(&rds_trans_sem); } EXPORT_SYMBOL_GPL(rds_trans_register); void rds_trans_unregister(struct rds_transport *trans) { down_write(&rds_trans_sem); transports[trans->t_type] = NULL; printk(KERN_INFO "Unregistered RDS/%s transport\n", trans->t_name); up_write(&rds_trans_sem); } EXPORT_SYMBOL_GPL(rds_trans_unregister); void rds_trans_put(struct rds_transport *trans) { if (trans) module_put(trans->t_owner); } struct rds_transport *rds_trans_get_preferred(struct net *net, const struct in6_addr *addr, __u32 scope_id) { struct rds_transport *ret = NULL; struct rds_transport *trans; unsigned int i; if (ipv6_addr_v4mapped(addr)) { if (*(u_int8_t *)&addr->s6_addr32[3] == IN_LOOPBACKNET) return &rds_loop_transport; } else if (ipv6_addr_loopback(addr)) { return &rds_loop_transport; } down_read(&rds_trans_sem); for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (trans && (trans->laddr_check(net, addr, scope_id) == 0) && (!trans->t_owner || try_module_get(trans->t_owner))) { ret = trans; break; } } up_read(&rds_trans_sem); return ret; } struct rds_transport *rds_trans_get(int t_type) { struct rds_transport *ret = NULL; struct rds_transport *trans; down_read(&rds_trans_sem); trans = transports[t_type]; if (!trans) { up_read(&rds_trans_sem); if (rds_trans_modules[t_type]) request_module(rds_trans_modules[t_type]); down_read(&rds_trans_sem); trans = transports[t_type]; } if (trans && trans->t_type == t_type && (!trans->t_owner || try_module_get(trans->t_owner))) ret = trans; up_read(&rds_trans_sem); return ret; } /* * This returns the number of stats entries in the snapshot and only * copies them using the iter if there is enough space for them. The * caller passes in the global stats so that we can size and copy while * holding the lock. */ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, unsigned int avail) { struct rds_transport *trans; unsigned int total = 0; unsigned int part; int i; rds_info_iter_unmap(iter); down_read(&rds_trans_sem); for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (!trans || !trans->stats_info_copy) continue; part = trans->stats_info_copy(iter, avail); avail -= min(avail, part); total += part; } up_read(&rds_trans_sem); return total; } |
2 1 486 482 4 2 793 793 794 401 16 77 400 104 48 141 2 339 230 768 15 13 2 768 6 6 6 5 2 1 6 2 2 7 9 2 11 9 2 781 783 11 2 24 15 38 4 6552 629 618 14 6113 1013 7 1008 8 2499 1 247 2 1 1 223 959 9 1757 1265 464 18 2948 251 15 1 7 1586 4721 4718 6615 791 42 102 203 7627 1003 7513 7566 295 15 575 6768 958 6587 6756 472 1238 1242 257 14 14 14 14 14 14 7537 226 138 205 1836 1837 343 342 344 734 709 28 12 1 11 1230 1236 837 1220 466 1042 1044 1050 955 951 955 14 14 13 2513 2513 2530 1531 1524 1526 34 35 34 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 | // SPDX-License-Identifier: GPL-2.0 /* * NETLINK Netlink attributes * * Authors: Thomas Graf <tgraf@suug.ch> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/jiffies.h> #include <linux/nospec.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> #include <net/netlink.h> /* For these data types, attribute length should be exactly the given * size. However, to maintain compatibility with broken commands, if the * attribute length does not match the expected size a warning is emitted * to the user that the command is sending invalid data and needs to be fixed. */ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), [NLA_S8] = sizeof(s8), [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), [NLA_BE16] = sizeof(__be16), [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), [NLA_MSECS] = sizeof(u64), [NLA_NESTED] = NLA_HDRLEN, [NLA_S8] = sizeof(s8), [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), [NLA_BE16] = sizeof(__be16), [NLA_BE32] = sizeof(__be32), }; /* * Nested policies might refer back to the original * policy in some cases, and userspace could try to * abuse that and recurse by nesting in the right * ways. Limit recursion to avoid this problem. */ #define MAX_POLICY_RECURSION_DEPTH 10 static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack, struct nlattr **tb, unsigned int depth); static int validate_nla_bitfield32(const struct nlattr *nla, const u32 valid_flags_mask) { const struct nla_bitfield32 *bf = nla_data(nla); if (!valid_flags_mask) return -EINVAL; /*disallow invalid bit selector */ if (bf->selector & ~valid_flags_mask) return -EINVAL; /*disallow invalid bit values */ if (bf->value & ~valid_flags_mask) return -EINVAL; /*disallow valid bit values that are not selected*/ if (bf->value & ~bf->selector) return -EINVAL; return 0; } static int nla_validate_array(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack, unsigned int validate, unsigned int depth) { const struct nlattr *entry; int rem; nla_for_each_attr(entry, head, len, rem) { int ret; if (nla_len(entry) == 0) continue; if (nla_len(entry) < NLA_HDRLEN) { NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy, "Array element too short"); return -ERANGE; } ret = __nla_validate_parse(nla_data(entry), nla_len(entry), maxtype, policy, validate, extack, NULL, depth + 1); if (ret < 0) return ret; } return 0; } void nla_get_range_unsigned(const struct nla_policy *pt, struct netlink_range_validation *range) { WARN_ON_ONCE(pt->validation_type != NLA_VALIDATE_RANGE_PTR && (pt->min < 0 || pt->max < 0)); range->min = 0; switch (pt->type) { case NLA_U8: range->max = U8_MAX; break; case NLA_U16: case NLA_BE16: case NLA_BINARY: range->max = U16_MAX; break; case NLA_U32: case NLA_BE32: range->max = U32_MAX; break; case NLA_U64: case NLA_UINT: case NLA_MSECS: range->max = U64_MAX; break; default: WARN_ON_ONCE(1); return; } switch (pt->validation_type) { case NLA_VALIDATE_RANGE: case NLA_VALIDATE_RANGE_WARN_TOO_LONG: range->min = pt->min; range->max = pt->max; break; case NLA_VALIDATE_RANGE_PTR: *range = *pt->range; break; case NLA_VALIDATE_MIN: range->min = pt->min; break; case NLA_VALIDATE_MAX: range->max = pt->max; break; default: break; } } static int nla_validate_range_unsigned(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack, unsigned int validate) { struct netlink_range_validation range; u64 value; switch (pt->type) { case NLA_U8: value = nla_get_u8(nla); break; case NLA_U16: value = nla_get_u16(nla); break; case NLA_U32: value = nla_get_u32(nla); break; case NLA_U64: value = nla_get_u64(nla); break; case NLA_UINT: value = nla_get_uint(nla); break; case NLA_MSECS: value = nla_get_u64(nla); break; case NLA_BINARY: value = nla_len(nla); break; case NLA_BE16: value = ntohs(nla_get_be16(nla)); break; case NLA_BE32: value = ntohl(nla_get_be32(nla)); break; default: return -EINVAL; } nla_get_range_unsigned(pt, &range); if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG && pt->type == NLA_BINARY && value > range.max) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, pt->type); if (validate & NL_VALIDATE_STRICT_ATTRS) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "invalid attribute length"); return -EINVAL; } /* this assumes min <= max (don't validate against min) */ return 0; } if (value < range.min || value > range.max) { bool binary = pt->type == NLA_BINARY; if (binary) NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "binary attribute size out of range"); else NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "integer out of range"); return -ERANGE; } return 0; } void nla_get_range_signed(const struct nla_policy *pt, struct netlink_range_validation_signed *range) { switch (pt->type) { case NLA_S8: range->min = S8_MIN; range->max = S8_MAX; break; case NLA_S16: range->min = S16_MIN; range->max = S16_MAX; break; case NLA_S32: range->min = S32_MIN; range->max = S32_MAX; break; case NLA_S64: case NLA_SINT: range->min = S64_MIN; range->max = S64_MAX; break; default: WARN_ON_ONCE(1); return; } switch (pt->validation_type) { case NLA_VALIDATE_RANGE: range->min = pt->min; range->max = pt->max; break; case NLA_VALIDATE_RANGE_PTR: *range = *pt->range_signed; break; case NLA_VALIDATE_MIN: range->min = pt->min; break; case NLA_VALIDATE_MAX: range->max = pt->max; break; default: break; } } static int nla_validate_int_range_signed(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack) { struct netlink_range_validation_signed range; s64 value; switch (pt->type) { case NLA_S8: value = nla_get_s8(nla); break; case NLA_S16: value = nla_get_s16(nla); break; case NLA_S32: value = nla_get_s32(nla); break; case NLA_S64: value = nla_get_s64(nla); break; case NLA_SINT: value = nla_get_sint(nla); break; default: return -EINVAL; } nla_get_range_signed(pt, &range); if (value < range.min || value > range.max) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "integer out of range"); return -ERANGE; } return 0; } static int nla_validate_int_range(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack, unsigned int validate) { switch (pt->type) { case NLA_U8: case NLA_U16: case NLA_U32: case NLA_U64: case NLA_UINT: case NLA_MSECS: case NLA_BINARY: case NLA_BE16: case NLA_BE32: return nla_validate_range_unsigned(pt, nla, extack, validate); case NLA_S8: case NLA_S16: case NLA_S32: case NLA_S64: case NLA_SINT: return nla_validate_int_range_signed(pt, nla, extack); default: WARN_ON(1); return -EINVAL; } } static int nla_validate_mask(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack) { u64 value; switch (pt->type) { case NLA_U8: value = nla_get_u8(nla); break; case NLA_U16: value = nla_get_u16(nla); break; case NLA_U32: value = nla_get_u32(nla); break; case NLA_U64: value = nla_get_u64(nla); break; case NLA_UINT: value = nla_get_uint(nla); break; case NLA_BE16: value = ntohs(nla_get_be16(nla)); break; case NLA_BE32: value = ntohl(nla_get_be32(nla)); break; default: return -EINVAL; } if (value & ~(u64)pt->mask) { NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set"); return -EINVAL; } return 0; } static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack, unsigned int depth) { u16 strict_start_type = policy[0].strict_start_type; const struct nla_policy *pt; int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); int err = -ERANGE; if (strict_start_type && type >= strict_start_type) validate |= NL_VALIDATE_STRICT; if (type <= 0 || type > maxtype) return 0; type = array_index_nospec(type, maxtype + 1); pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); if (validate & NL_VALIDATE_STRICT_ATTRS) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "invalid attribute length"); return -EINVAL; } } if (validate & NL_VALIDATE_NESTED) { if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) && !(nla->nla_type & NLA_F_NESTED)) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "NLA_F_NESTED is missing"); return -EINVAL; } if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY && pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "NLA_F_NESTED not expected"); return -EINVAL; } } switch (pt->type) { case NLA_REJECT: if (extack && pt->reject_message) { NL_SET_BAD_ATTR(extack, nla); extack->_msg = pt->reject_message; return -EINVAL; } err = -EINVAL; goto out_err; case NLA_FLAG: if (attrlen > 0) goto out_err; break; case NLA_SINT: case NLA_UINT: if (attrlen != sizeof(u32) && attrlen != sizeof(u64)) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "invalid attribute length"); return -EINVAL; } break; case NLA_BITFIELD32: if (attrlen != sizeof(struct nla_bitfield32)) goto out_err; err = validate_nla_bitfield32(nla, pt->bitfield32_valid); if (err) goto out_err; break; case NLA_NUL_STRING: if (pt->len) minlen = min_t(int, attrlen, pt->len + 1); else minlen = attrlen; if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) { err = -EINVAL; goto out_err; } fallthrough; case NLA_STRING: if (attrlen < 1) goto out_err; if (pt->len) { char *buf = nla_data(nla); if (buf[attrlen - 1] == '\0') attrlen--; if (attrlen > pt->len) goto out_err; } break; case NLA_BINARY: if (pt->len && attrlen > pt->len) goto out_err; break; case NLA_NESTED: /* a nested attributes is allowed to be empty; if its not, * it must have a size of at least NLA_HDRLEN. */ if (attrlen == 0) break; if (attrlen < NLA_HDRLEN) goto out_err; if (pt->nested_policy) { err = __nla_validate_parse(nla_data(nla), nla_len(nla), pt->len, pt->nested_policy, validate, extack, NULL, depth + 1); if (err < 0) { /* * return directly to preserve the inner * error message/attribute pointer */ return err; } } break; case NLA_NESTED_ARRAY: /* a nested array attribute is allowed to be empty; if its not, * it must have a size of at least NLA_HDRLEN. */ if (attrlen == 0) break; if (attrlen < NLA_HDRLEN) goto out_err; if (pt->nested_policy) { int err; err = nla_validate_array(nla_data(nla), nla_len(nla), pt->len, pt->nested_policy, extack, validate, depth); if (err < 0) { /* * return directly to preserve the inner * error message/attribute pointer */ return err; } } break; case NLA_UNSPEC: if (validate & NL_VALIDATE_UNSPEC) { NL_SET_ERR_MSG_ATTR(extack, nla, "Unsupported attribute"); return -EINVAL; } if (attrlen < pt->len) goto out_err; break; default: if (pt->len) minlen = pt->len; else minlen = nla_attr_minlen[pt->type]; if (attrlen < minlen) goto out_err; } /* further validation */ switch (pt->validation_type) { case NLA_VALIDATE_NONE: /* nothing to do */ break; case NLA_VALIDATE_RANGE_PTR: case NLA_VALIDATE_RANGE: case NLA_VALIDATE_RANGE_WARN_TOO_LONG: case NLA_VALIDATE_MIN: case NLA_VALIDATE_MAX: err = nla_validate_int_range(pt, nla, extack, validate); if (err) return err; break; case NLA_VALIDATE_MASK: err = nla_validate_mask(pt, nla, extack); if (err) return err; break; case NLA_VALIDATE_FUNCTION: if (pt->validate) { err = pt->validate(nla, extack); if (err) return err; } break; } return 0; out_err: NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "Attribute failed policy validation"); return err; } static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack, struct nlattr **tb, unsigned int depth) { const struct nlattr *nla; int rem; if (depth >= MAX_POLICY_RECURSION_DEPTH) { NL_SET_ERR_MSG(extack, "allowed policy recursion depth exceeded"); return -EINVAL; } if (tb) memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); nla_for_each_attr(nla, head, len, rem) { u16 type = nla_type(nla); if (type == 0 || type > maxtype) { if (validate & NL_VALIDATE_MAXTYPE) { NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute type"); return -EINVAL; } continue; } type = array_index_nospec(type, maxtype + 1); if (policy) { int err = validate_nla(nla, maxtype, policy, validate, extack, depth); if (err < 0) return err; } if (tb) tb[type] = (struct nlattr *)nla; } if (unlikely(rem > 0)) { pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", rem, current->comm); NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); if (validate & NL_VALIDATE_TRAILING) return -EINVAL; } return 0; } /** * __nla_validate - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the * specified policy. Validation depends on the validate flags passed, see * &enum netlink_validation for more details on that. * See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ int __nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { return __nla_validate_parse(head, len, maxtype, policy, validate, extack, NULL, 0); } EXPORT_SYMBOL(__nla_validate); /** * nla_policy_len - Determine the max. length of a policy * @p: policy to use * @n: number of policies * * Determines the max. length of the policy. It is currently used * to allocated Netlink buffers roughly the size of the actual * message. * * Returns 0 on success or a negative error code. */ int nla_policy_len(const struct nla_policy *p, int n) { int i, len = 0; for (i = 0; i < n; i++, p++) { if (p->len) len += nla_total_size(p->len); else if (nla_attr_len[p->type]) len += nla_total_size(nla_attr_len[p->type]); else if (nla_attr_minlen[p->type]) len += nla_total_size(nla_attr_minlen[p->type]); } return len; } EXPORT_SYMBOL(nla_policy_len); /** * __nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy * @validate: validation strictness * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessible via the attribute type. * Validation is controlled by the @validate parameter. * * Returns 0 on success or a negative error code. */ int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { return __nla_validate_parse(head, len, maxtype, policy, validate, extack, tb, 0); } EXPORT_SYMBOL(__nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @attrtype: type of attribute to look for * * Returns the first attribute in the stream matching the specified type. */ struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) { const struct nlattr *nla; int rem; nla_for_each_attr(nla, head, len, rem) if (nla_type(nla) == attrtype) return (struct nlattr *)nla; return NULL; } EXPORT_SYMBOL(nla_find); /** * nla_strscpy - Copy string attribute payload into a sized buffer * @dst: Where to copy the string to. * @nla: Attribute to copy the string from. * @dstsize: Size of destination buffer. * * Copies at most dstsize - 1 bytes into the destination buffer. * Unlike strscpy() the destination buffer is always padded out. * * Return: * * srclen - Returns @nla length (not including the trailing %NUL). * * -E2BIG - If @dstsize is 0 or greater than U16_MAX or @nla length greater * than @dstsize. */ ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize) { size_t srclen = nla_len(nla); char *src = nla_data(nla); ssize_t ret; size_t len; if (dstsize == 0 || WARN_ON_ONCE(dstsize > U16_MAX)) return -E2BIG; if (srclen > 0 && src[srclen - 1] == '\0') srclen--; if (srclen >= dstsize) { len = dstsize - 1; ret = -E2BIG; } else { len = srclen; ret = len; } memcpy(dst, src, len); /* Zero pad end of dst. */ memset(dst + len, 0, dstsize - len); return ret; } EXPORT_SYMBOL(nla_strscpy); /** * nla_strdup - Copy string attribute payload into a newly allocated buffer * @nla: attribute to copy the string from * @flags: the type of memory to allocate (see kmalloc). * * Returns a pointer to the allocated buffer or NULL on error. */ char *nla_strdup(const struct nlattr *nla, gfp_t flags) { size_t srclen = nla_len(nla); char *src = nla_data(nla), *dst; if (srclen > 0 && src[srclen - 1] == '\0') srclen--; dst = kmalloc(srclen + 1, flags); if (dst != NULL) { memcpy(dst, src, srclen); dst[srclen] = '\0'; } return dst; } EXPORT_SYMBOL(nla_strdup); /** * nla_memcpy - Copy a netlink attribute into another memory area * @dest: where to copy to memcpy * @src: netlink attribute to copy from * @count: size of the destination area * * Note: The number of bytes copied is limited by the length of * attribute's payload. memcpy * * Returns the number of bytes copied. */ int nla_memcpy(void *dest, const struct nlattr *src, int count) { int minlen = min_t(int, count, nla_len(src)); memcpy(dest, nla_data(src), minlen); if (count > minlen) memset(dest + minlen, 0, count - minlen); return minlen; } EXPORT_SYMBOL(nla_memcpy); /** * nla_memcmp - Compare an attribute with sized memory area * @nla: netlink attribute * @data: memory area * @size: size of memory area */ int nla_memcmp(const struct nlattr *nla, const void *data, size_t size) { int d = nla_len(nla) - size; if (d == 0) d = memcmp(nla_data(nla), data, size); return d; } EXPORT_SYMBOL(nla_memcmp); /** * nla_strcmp - Compare a string attribute against a string * @nla: netlink string attribute * @str: another string */ int nla_strcmp(const struct nlattr *nla, const char *str) { int len = strlen(str); char *buf = nla_data(nla); int attrlen = nla_len(nla); int d; while (attrlen > 0 && buf[attrlen - 1] == '\0') attrlen--; d = attrlen - len; if (d == 0) d = memcmp(nla_data(nla), str, len); return d; } EXPORT_SYMBOL(nla_strcmp); #ifdef CONFIG_NET /** * __nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) { struct nlattr *nla; nla = skb_put(skb, nla_total_size(attrlen)); nla->nla_type = attrtype; nla->nla_len = nla_attr_size(attrlen); memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); return nla; } EXPORT_SYMBOL(__nla_reserve); /** * __nla_reserve_64bit - reserve room for attribute on the skb and align it * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * @padattr: attribute type for the padding * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. It also ensure that this * attribute will have a 64-bit aligned nla_data() area. * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, int padattr) { nla_align_64bit(skb, padattr); return __nla_reserve(skb, attrtype, attrlen); } EXPORT_SYMBOL(__nla_reserve_64bit); /** * __nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * * The caller is responsible to ensure that the skb provides enough * tailroom for the payload. */ void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) { return skb_put_zero(skb, NLA_ALIGN(attrlen)); } EXPORT_SYMBOL(__nla_reserve_nohdr); /** * nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. * * Returns NULL if the tailroom of the skb is insufficient to store * the attribute header and payload. */ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) return NULL; return __nla_reserve(skb, attrtype, attrlen); } EXPORT_SYMBOL(nla_reserve); /** * nla_reserve_64bit - reserve room for attribute on the skb and align it * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * @padattr: attribute type for the padding * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. It also ensure that this * attribute will have a 64-bit aligned nla_data() area. * * Returns NULL if the tailroom of the skb is insufficient to store * the attribute header and payload. */ struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, int padattr) { size_t len; if (nla_need_padding_for_64bit(skb)) len = nla_total_size_64bit(attrlen); else len = nla_total_size(attrlen); if (unlikely(skb_tailroom(skb) < len)) return NULL; return __nla_reserve_64bit(skb, attrtype, attrlen, padattr); } EXPORT_SYMBOL(nla_reserve_64bit); /** * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * * Returns NULL if the tailroom of the skb is insufficient to store * the attribute payload. */ void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) return NULL; return __nla_reserve_nohdr(skb, attrlen); } EXPORT_SYMBOL(nla_reserve_nohdr); /** * __nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { struct nlattr *nla; nla = __nla_reserve(skb, attrtype, attrlen); memcpy(nla_data(nla), data, attrlen); } EXPORT_SYMBOL(__nla_put); /** * __nla_put_64bit - Add a netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * @padattr: attribute type for the padding * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, const void *data, int padattr) { struct nlattr *nla; nla = __nla_reserve_64bit(skb, attrtype, attrlen, padattr); memcpy(nla_data(nla), data, attrlen); } EXPORT_SYMBOL(__nla_put_64bit); /** * __nla_put_nohdr - Add a netlink attribute without header * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload * @data: head of attribute payload * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute payload. */ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { void *start; start = __nla_reserve_nohdr(skb, attrlen); memcpy(start, data, attrlen); } EXPORT_SYMBOL(__nla_put_nohdr); /** * nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) return -EMSGSIZE; __nla_put(skb, attrtype, attrlen, data); return 0; } EXPORT_SYMBOL(nla_put); /** * nla_put_64bit - Add a netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * @padattr: attribute type for the padding * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, const void *data, int padattr) { size_t len; if (nla_need_padding_for_64bit(skb)) len = nla_total_size_64bit(attrlen); else len = nla_total_size(attrlen); if (unlikely(skb_tailroom(skb) < len)) return -EMSGSIZE; __nla_put_64bit(skb, attrtype, attrlen, data, padattr); return 0; } EXPORT_SYMBOL(nla_put_64bit); /** * nla_put_nohdr - Add a netlink attribute without header * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload * @data: head of attribute payload * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) return -EMSGSIZE; __nla_put_nohdr(skb, attrlen, data); return 0; } EXPORT_SYMBOL(nla_put_nohdr); /** * nla_append - Add a netlink attribute without header or padding * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload * @data: head of attribute payload * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_append(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) return -EMSGSIZE; skb_put_data(skb, data, attrlen); return 0; } EXPORT_SYMBOL(nla_append); #endif |
15 1 1 1 10 1 1 3 2 1 3 2 1 1 1 7 1 6 6 6 6 1 1 3 3 2 1 3 3 3 6 6 1 1 3 1 1 1 3 1 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/if_vlan.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> static struct tc_action_ops act_vlan_ops; TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_vlan *v = to_vlan(a); struct tcf_vlan_params *p; int action; int err; u16 tci; tcf_lastuse_update(&v->tcf_tm); tcf_action_update_bstats(&v->common, skb); /* Ensure 'data' points at mac_header prior calling vlan manipulating * functions. */ if (skb_at_tc_ingress(skb)) skb_push_rcsum(skb, skb->mac_len); action = READ_ONCE(v->tcf_action); p = rcu_dereference_bh(v->vlan_p); switch (p->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); if (err) goto drop; break; case TCA_VLAN_ACT_PUSH: err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid | (p->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; case TCA_VLAN_ACT_MODIFY: /* No-op if no vlan tag (either hw-accel or in-payload) */ if (!skb_vlan_tagged(skb)) goto out; /* extract existing tag (and guarantee no hw-accel tag) */ if (skb_vlan_tag_present(skb)) { tci = skb_vlan_tag_get(skb); __vlan_hwaccel_clear_tag(skb); } else { /* in-payload vlan tag, pop it */ err = __skb_vlan_pop(skb, &tci); if (err) goto drop; } /* replace the vid */ tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid; /* replace prio bits, if tcfv_push_prio specified */ if (p->tcfv_push_prio_exists) { tci &= ~VLAN_PRIO_MASK; tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT; } /* put updated tci as hwaccel tag */ __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci); break; case TCA_VLAN_ACT_POP_ETH: err = skb_eth_pop(skb); if (err) goto drop; break; case TCA_VLAN_ACT_PUSH_ETH: err = skb_eth_push(skb, p->tcfv_push_dst, p->tcfv_push_src); if (err) goto drop; break; default: BUG(); } out: if (skb_at_tc_ingress(skb)) skb_pull_rcsum(skb, skb->mac_len); skb_reset_mac_len(skb); return action; drop: tcf_action_inc_drop_qstats(&v->common); return TC_ACT_SHOT; } static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { [TCA_VLAN_UNSPEC] = { .strict_start_type = TCA_VLAN_PUSH_ETH_DST }, [TCA_VLAN_PARMS] = { .len = sizeof(struct tc_vlan) }, [TCA_VLAN_PUSH_VLAN_ID] = { .type = NLA_U16 }, [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NLA_U16 }, [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NLA_U8 }, [TCA_VLAN_PUSH_ETH_DST] = NLA_POLICY_ETH_ADDR, [TCA_VLAN_PUSH_ETH_SRC] = NLA_POLICY_ETH_ADDR, }; static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_VLAN_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool push_prio_exists = false; struct tcf_vlan_params *p; struct tc_vlan *parm; struct tcf_vlan *v; int action; u16 push_vid = 0; __be16 push_proto = 0; u8 push_prio = 0; bool exists = false; int ret = 0, err; u32 index; if (!nla) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL); if (err < 0) return err; if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; switch (parm->v_action) { case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); if (push_vid >= VLAN_VID_MASK) { if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -ERANGE; } if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) { push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]); switch (push_proto) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): break; default: if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EPROTONOSUPPORT; } } else { push_proto = htons(ETH_P_8021Q); } push_prio_exists = !!tb[TCA_VLAN_PUSH_VLAN_PRIORITY]; if (push_prio_exists) push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]); break; case TCA_VLAN_ACT_POP_ETH: break; case TCA_VLAN_ACT_PUSH_ETH: if (!tb[TCA_VLAN_PUSH_ETH_DST] || !tb[TCA_VLAN_PUSH_ETH_SRC]) { if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } break; default: if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } action = parm->v_action; if (!exists) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_vlan_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; v = to_vlan(*a); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { err = -ENOMEM; goto put_chain; } p->tcfv_action = action; p->tcfv_push_vid = push_vid; p->tcfv_push_prio = push_prio; p->tcfv_push_prio_exists = push_prio_exists || action == TCA_VLAN_ACT_PUSH; p->tcfv_push_proto = push_proto; if (action == TCA_VLAN_ACT_PUSH_ETH) { nla_memcpy(&p->tcfv_push_dst, tb[TCA_VLAN_PUSH_ETH_DST], ETH_ALEN); nla_memcpy(&p->tcfv_push_src, tb[TCA_VLAN_PUSH_ETH_SRC], ETH_ALEN); } spin_lock_bh(&v->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p = rcu_replace_pointer(v->vlan_p, p, lockdep_is_held(&v->tcf_lock)); spin_unlock_bh(&v->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static void tcf_vlan_cleanup(struct tc_action *a) { struct tcf_vlan *v = to_vlan(a); struct tcf_vlan_params *p; p = rcu_dereference_protected(v->vlan_p, 1); if (p) kfree_rcu(p, rcu); } static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_vlan *v = to_vlan(a); struct tcf_vlan_params *p; struct tc_vlan opt = { .index = v->tcf_index, .refcnt = refcount_read(&v->tcf_refcnt) - ref, .bindcnt = atomic_read(&v->tcf_bindcnt) - bind, }; struct tcf_t t; spin_lock_bh(&v->tcf_lock); opt.action = v->tcf_action; p = rcu_dereference_protected(v->vlan_p, lockdep_is_held(&v->tcf_lock)); opt.v_action = p->tcfv_action; if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((p->tcfv_action == TCA_VLAN_ACT_PUSH || p->tcfv_action == TCA_VLAN_ACT_MODIFY) && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, p->tcfv_push_proto) || (p->tcfv_push_prio_exists && nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, p->tcfv_push_prio)))) goto nla_put_failure; if (p->tcfv_action == TCA_VLAN_ACT_PUSH_ETH) { if (nla_put(skb, TCA_VLAN_PUSH_ETH_DST, ETH_ALEN, p->tcfv_push_dst)) goto nla_put_failure; if (nla_put(skb, TCA_VLAN_PUSH_ETH_SRC, ETH_ALEN, p->tcfv_push_src)) goto nla_put_failure; } tcf_tm_dump(&t, &v->tcf_tm); if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD)) goto nla_put_failure; spin_unlock_bh(&v->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&v->tcf_lock); nlmsg_trim(skb, b); return -1; } static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_vlan *v = to_vlan(a); struct tcf_t *tm = &v->tcf_tm; tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static size_t tcf_vlan_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_vlan)) + nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_ID */ + nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_PROTOCOL */ + nla_total_size(sizeof(u8)); /* TCA_VLAN_PUSH_VLAN_PRIORITY */ } static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: entry->id = FLOW_ACTION_VLAN_PUSH; entry->vlan.vid = tcf_vlan_push_vid(act); entry->vlan.proto = tcf_vlan_push_proto(act); entry->vlan.prio = tcf_vlan_push_prio(act); break; case TCA_VLAN_ACT_POP: entry->id = FLOW_ACTION_VLAN_POP; break; case TCA_VLAN_ACT_MODIFY: entry->id = FLOW_ACTION_VLAN_MANGLE; entry->vlan.vid = tcf_vlan_push_vid(act); entry->vlan.proto = tcf_vlan_push_proto(act); entry->vlan.prio = tcf_vlan_push_prio(act); break; case TCA_VLAN_ACT_POP_ETH: entry->id = FLOW_ACTION_VLAN_POP_ETH; break; case TCA_VLAN_ACT_PUSH_ETH: entry->id = FLOW_ACTION_VLAN_PUSH_ETH; tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act); break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported vlan action mode offload"); return -EOPNOTSUPP; } *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: fl_action->id = FLOW_ACTION_VLAN_PUSH; break; case TCA_VLAN_ACT_POP: fl_action->id = FLOW_ACTION_VLAN_POP; break; case TCA_VLAN_ACT_MODIFY: fl_action->id = FLOW_ACTION_VLAN_MANGLE; break; case TCA_VLAN_ACT_POP_ETH: fl_action->id = FLOW_ACTION_VLAN_POP_ETH; break; case TCA_VLAN_ACT_PUSH_ETH: fl_action->id = FLOW_ACTION_VLAN_PUSH_ETH; break; default: return -EOPNOTSUPP; } } return 0; } static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .id = TCA_ID_VLAN, .owner = THIS_MODULE, .act = tcf_vlan_act, .dump = tcf_vlan_dump, .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, .offload_act_setup = tcf_vlan_offload_act_setup, .size = sizeof(struct tcf_vlan), }; MODULE_ALIAS_NET_ACT("vlan"); static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); return tc_action_net_init(net, tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_vlan_ops.net_id); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, .exit_batch = vlan_exit_net, .id = &act_vlan_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init vlan_init_module(void) { return tcf_register_action(&act_vlan_ops, &vlan_net_ops); } static void __exit vlan_cleanup_module(void) { tcf_unregister_action(&act_vlan_ops, &vlan_net_ops); } module_init(vlan_init_module); module_exit(vlan_cleanup_module); MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); MODULE_DESCRIPTION("vlan manipulation actions"); MODULE_LICENSE("GPL v2"); |
159 162 161 105 103 3 104 3 104 103 1 106 9 97 29 11 28 33 33 5 30 11 11 5 3 5 154 174 176 170 6 4 2 94 130 172 175 9 165 102 127 91 94 161 118 106 26 26 3 7 7 7 7 23 9 21 26 26 24 24 1 9 5 19 6 5 1 6 169 9 167 7 168 168 7 6 33 5 26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 International Business Machines Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * This module provides the abstraction for an SCTP transport representing * a remote transport address. For local transport addresses, we just use * union sctp_addr. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@us.ibm.com> * Xingang Guo <xingang.guo@intel.com> * Hui Huang <hui.huang@nokia.com> * Sridhar Samudrala <sri@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include <linux/types.h> #include <linux/random.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ static struct sctp_transport *sctp_transport_init(struct net *net, struct sctp_transport *peer, const union sctp_addr *addr, gfp_t gfp) { /* Copy in the address. */ peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len); memset(&peer->saddr, 0, sizeof(union sctp_addr)); peer->sack_generation = 0; /* From 6.3.1 RTO Calculation: * * C1) Until an RTT measurement has been made for a packet sent to the * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ peer->rto = msecs_to_jiffies(net->sctp.rto_initial); peer->last_time_heard = 0; peer->last_time_ecne_reduced = jiffies; peer->param_flags = SPP_HB_DISABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; /* Initialize the default path max_retrans. */ peer->pathmaxrxt = net->sctp.max_retrans_path; peer->pf_retrans = net->sctp.pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0); timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0); timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0); timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0); timer_setup(&peer->proto_unreach_timer, sctp_generate_proto_unreach_event, 0); /* Initialize the 64-bit random nonce sent with heartbeat. */ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); refcount_set(&peer->refcnt, 1); return peer; } /* Allocate and initialize a new transport. */ struct sctp_transport *sctp_transport_new(struct net *net, const union sctp_addr *addr, gfp_t gfp) { struct sctp_transport *transport; transport = kzalloc(sizeof(*transport), gfp); if (!transport) goto fail; if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; SCTP_DBG_OBJCNT_INC(transport); return transport; fail_init: kfree(transport); fail: return NULL; } /* This transport is no longer needed. Free up if possible, or * delay until it last reference count. */ void sctp_transport_free(struct sctp_transport *transport) { /* Try to delete the heartbeat timer. */ if (del_timer(&transport->hb_timer)) sctp_transport_put(transport); /* Delete the T3_rtx timer if it's active. * There is no point in not doing this now and letting * structure hang around in memory since we know * the transport is going away. */ if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); if (del_timer(&transport->reconf_timer)) sctp_transport_put(transport); if (del_timer(&transport->probe_timer)) sctp_transport_put(transport); /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) sctp_transport_put(transport); sctp_transport_put(transport); } static void sctp_transport_destroy_rcu(struct rcu_head *head) { struct sctp_transport *transport; transport = container_of(head, struct sctp_transport, rcu); dst_release(transport->dst); kfree(transport); SCTP_DBG_OBJCNT_DEC(transport); } /* Destroy the transport data structure. * Assumes there are no more users of this structure. */ static void sctp_transport_destroy(struct sctp_transport *transport) { if (unlikely(refcount_read(&transport->refcnt))) { WARN(1, "Attempt to destroy undead transport %p!\n", transport); return; } sctp_packet_free(&transport->packet); if (transport->asoc) sctp_association_put(transport->asoc); call_rcu(&transport->rcu, sctp_transport_destroy_rcu); } /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ void sctp_transport_reset_t3_rtx(struct sctp_transport *transport) { /* RFC 2960 6.3.2 Retransmission Timer Rules * * R1) Every time a DATA chunk is sent to any address(including a * retransmission), if the T3-rtx timer of that address is not running * start it running so that it will expire after the RTO of that * address. */ if (!timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); } void sctp_transport_reset_hb_timer(struct sctp_transport *transport) { unsigned long expires; /* When a data chunk is sent, reset the heartbeat interval. */ expires = jiffies + sctp_transport_timeout(transport); if (!mod_timer(&transport->hb_timer, expires + get_random_u32_below(transport->rto))) sctp_transport_hold(transport); } void sctp_transport_reset_reconf_timer(struct sctp_transport *transport) { if (!timer_pending(&transport->reconf_timer)) if (!mod_timer(&transport->reconf_timer, jiffies + transport->rto)) sctp_transport_hold(transport); } void sctp_transport_reset_probe_timer(struct sctp_transport *transport) { if (!mod_timer(&transport->probe_timer, jiffies + transport->probe_interval)) sctp_transport_hold(transport); } void sctp_transport_reset_raise_timer(struct sctp_transport *transport) { if (!mod_timer(&transport->probe_timer, jiffies + transport->probe_interval * 30)) sctp_transport_hold(transport); } /* This transport has been assigned to an association. * Initialize fields from the association or from the sock itself. * Register the reference count in the association. */ void sctp_transport_set_owner(struct sctp_transport *transport, struct sctp_association *asoc) { transport->asoc = asoc; sctp_association_hold(asoc); } /* Initialize the pmtu of a transport. */ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ if (!transport->dst || transport->dst->obsolete) { sctp_transport_dst_release(transport); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); } if (transport->param_flags & SPP_PMTUD_DISABLE) { struct sctp_association *asoc = transport->asoc; if (!transport->pathmtu && asoc && asoc->pathmtu) transport->pathmtu = asoc->pathmtu; if (transport->pathmtu) return; } if (transport->dst) transport->pathmtu = sctp_dst_mtu(transport->dst); else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; sctp_transport_pl_update(transport); } void sctp_transport_pl_send(struct sctp_transport *t) { if (t->pl.probe_count < SCTP_MAX_PROBES) goto out; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } } else if (t->pl.state == SCTP_PL_SEARCH) { if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */ t->pl.state = SCTP_PL_BASE; /* Search -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.probe_high = 0; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } else { /* Normal probe failure. */ t->pl.probe_high = t->pl.probe_size; t->pl.probe_size = t->pl.pmtu; } } else if (t->pl.state == SCTP_PL_COMPLETE) { if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */ t->pl.state = SCTP_PL_BASE; /* Search Complete -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } } out: pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); t->pl.probe_count++; } bool sctp_transport_pl_recv(struct sctp_transport *t) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */ t->pl.probe_size += SCTP_PL_BIG_STEP; } else if (t->pl.state == SCTP_PL_ERROR) { t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */ t->pl.pmtu = t->pl.probe_size; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); t->pl.probe_size += SCTP_PL_BIG_STEP; } else if (t->pl.state == SCTP_PL_SEARCH) { if (!t->pl.probe_high) { if (t->pl.probe_size < SCTP_MAX_PLPMTU) { t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, SCTP_MAX_PLPMTU); return false; } t->pl.probe_high = SCTP_MAX_PLPMTU; } t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { t->pl.probe_high = 0; t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */ t->pl.probe_size = t->pl.pmtu; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); sctp_transport_reset_raise_timer(t); } } else if (t->pl.state == SCTP_PL_COMPLETE) { /* Raise probe_size again after 30 * interval in Search Complete */ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU); } return t->pl.state == SCTP_PL_COMPLETE; } static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu); if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size) return false; if (t->pl.state == SCTP_PL_BASE) { if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) { t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); return true; } } else if (t->pl.state == SCTP_PL_SEARCH) { if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) { t->pl.state = SCTP_PL_BASE; /* Search -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.probe_count = 0; t->pl.probe_high = 0; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); return true; } else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) { t->pl.probe_size = pmtu; t->pl.probe_count = 0; } } else if (t->pl.state == SCTP_PL_COMPLETE) { if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) { t->pl.state = SCTP_PL_BASE; /* Complete -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.probe_count = 0; t->pl.probe_high = 0; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_transport_reset_probe_timer(t); return true; } } return false; } bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct sock *sk = t->asoc->base.sk; struct dst_entry *dst; bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment instead */ pmtu = SCTP_DEFAULT_MINSEGMENT; } pmtu = SCTP_TRUNC4(pmtu); if (sctp_transport_pl_enabled(t)) return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t)); dst = sctp_transport_dst_check(t); if (dst) { struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family); union sctp_addr addr; pf->af->from_sk(&addr, sk); pf->to_sk_daddr(&t->ipaddr, sk); dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); pf->to_sk_daddr(&addr, sk); dst = sctp_transport_dst_check(t); } if (!dst) { t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); dst = t->dst; } if (dst) { /* Re-fetch, as under layers may have a higher minimum size */ pmtu = sctp_dst_mtu(dst); change = t->pathmtu != pmtu; } t->pathmtu = pmtu; return change; } /* Caches the dst entry and source address for a transport's destination * address. */ void sctp_transport_route(struct sctp_transport *transport, union sctp_addr *saddr, struct sctp_sock *opt) { struct sctp_association *asoc = transport->asoc; struct sctp_af *af = transport->af_specific; sctp_transport_dst_release(transport); af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt)); if (saddr) memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); else af->get_saddr(opt, transport, &transport->fl); sctp_transport_pmtu(transport, sctp_opt2sk(opt)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). */ if (transport->dst && asoc && (!asoc->peer.primary_path || transport == asoc->peer.active_path)) opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk); } /* Hold a reference to a transport. */ int sctp_transport_hold(struct sctp_transport *transport) { return refcount_inc_not_zero(&transport->refcnt); } /* Release a reference to a transport and clean up * if there are no more references. */ void sctp_transport_put(struct sctp_transport *transport) { if (refcount_dec_and_test(&transport->refcnt)) sctp_transport_destroy(transport); } /* Update transport's RTO based on the newly calculated RTT. */ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) { if (unlikely(!tp->rto_pending)) /* We should not be doing any RTO updates unless rto_pending is set. */ pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp); if (tp->rttvar || tp->srtt) { struct net *net = tp->asoc->base.net; /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' */ /* Note: The above algorithm has been rewritten to * express rto_beta and rto_alpha as inverse powers * of two. * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) + (rtt >> net->sctp.rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. */ tp->srtt = rtt; tp->rttvar = rtt >> 1; } /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY. */ if (tp->rttvar == 0) tp->rttvar = SCTP_CLOCK_GRANULARITY; /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */ tp->rto = tp->srtt + (tp->rttvar << 2); /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min * seconds then it is rounded up to RTO.Min seconds. */ if (tp->rto < tp->asoc->rto_min) tp->rto = tp->asoc->rto_min; /* 6.3.1 C7) A maximum value may be placed on RTO provided it is * at least RTO.max seconds. */ if (tp->rto > tp->asoc->rto_max) tp->rto = tp->asoc->rto_max; sctp_max_rto(tp->asoc, tp); tp->rtt = rtt; /* Reset rto_pending so that a new RTT measurement is started when a * new data chunk is sent. */ tp->rto_pending = 0; pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n", __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto); } /* This routine updates the transport's cwnd and partial_bytes_acked * parameters based on the bytes acked in the received SACK. */ void sctp_transport_raise_cwnd(struct sctp_transport *transport, __u32 sack_ctsn, __u32 bytes_acked) { struct sctp_association *asoc = transport->asoc; __u32 cwnd, ssthresh, flight_size, pba, pmtu; cwnd = transport->cwnd; flight_size = transport->flight_size; /* See if we need to exit Fast Recovery first */ if (asoc->fast_recovery && TSN_lte(asoc->fast_recovery_exit, sack_ctsn)) asoc->fast_recovery = 0; ssthresh = transport->ssthresh; pba = transport->partial_bytes_acked; pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { /* RFC 4960 7.2.1 * o When cwnd is less than or equal to ssthresh, an SCTP * endpoint MUST use the slow-start algorithm to increase * cwnd only if the current congestion window is being fully * utilized, an incoming SACK advances the Cumulative TSN * Ack Point, and the data sender is not in Fast Recovery. * Only when these three conditions are met can the cwnd be * increased; otherwise, the cwnd MUST not be increased. * If these conditions are met, then cwnd MUST be increased * by, at most, the lesser of 1) the total size of the * previously outstanding DATA chunk(s) acknowledged, and * 2) the destination's path MTU. This upper bound protects * against the ACK-Splitting attack outlined in [SAVAGE99]. */ if (asoc->fast_recovery) return; /* The appropriate cwnd increase algorithm is performed * if, and only if the congestion window is being fully * utilized. Note that RFC4960 Errata 3.22 removed the * other condition on ctsn moving. */ if (flight_size < cwnd) return; if (bytes_acked > pmtu) cwnd += pmtu; else cwnd += bytes_acked; pr_debug("%s: slow start: transport:%p, bytes_acked:%d, " "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n", __func__, transport, bytes_acked, cwnd, ssthresh, flight_size, pba); } else { /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, * upon each SACK arrival, increase partial_bytes_acked * by the total number of bytes of all new chunks * acknowledged in that SACK including chunks * acknowledged by the new Cumulative TSN Ack and by Gap * Ack Blocks. (updated by RFC4960 Errata 3.22) * * When partial_bytes_acked is greater than cwnd and * before the arrival of the SACK the sender had less * bytes of data outstanding than cwnd (i.e., before * arrival of the SACK, flightsize was less than cwnd), * reset partial_bytes_acked to cwnd. (RFC 4960 Errata * 3.26) * * When partial_bytes_acked is equal to or greater than * cwnd and before the arrival of the SACK the sender * had cwnd or more bytes of data outstanding (i.e., * before arrival of the SACK, flightsize was greater * than or equal to cwnd), partial_bytes_acked is reset * to (partial_bytes_acked - cwnd). Next, cwnd is * increased by MTU. (RFC 4960 Errata 3.12) */ pba += bytes_acked; if (pba > cwnd && flight_size < cwnd) pba = cwnd; if (pba >= cwnd && flight_size >= cwnd) { pba = pba - cwnd; cwnd += pmtu; } pr_debug("%s: congestion avoidance: transport:%p, " "bytes_acked:%d, cwnd:%d, ssthresh:%d, " "flight_size:%d, pba:%d\n", __func__, transport, bytes_acked, cwnd, ssthresh, flight_size, pba); } transport->cwnd = cwnd; transport->partial_bytes_acked = pba; } /* This routine is used to lower the transport's cwnd when congestion is * detected. */ void sctp_transport_lower_cwnd(struct sctp_transport *transport, enum sctp_lower_cwnd reason) { struct sctp_association *asoc = transport->asoc; switch (reason) { case SCTP_LOWER_CWND_T3_RTX: /* RFC 2960 Section 7.2.3, sctpimpguide * When the T3-rtx timer expires on an address, SCTP should * perform slow start by: * ssthresh = max(cwnd/2, 4*MTU) * cwnd = 1*MTU * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, 4*asoc->pathmtu); transport->cwnd = asoc->pathmtu; /* T3-rtx also clears fast recovery */ asoc->fast_recovery = 0; break; case SCTP_LOWER_CWND_FAST_RTX: /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the * destination address(es) to which the missing DATA chunks * were last sent, according to the formula described in * Section 7.2.3. * * RFC 2960 7.2.3, sctpimpguide Upon detection of packet * losses from SACK (see Section 7.2.4), An endpoint * should do the following: * ssthresh = max(cwnd/2, 4*MTU) * cwnd = ssthresh * partial_bytes_acked = 0 */ if (asoc->fast_recovery) return; /* Mark Fast recovery */ asoc->fast_recovery = 1; asoc->fast_recovery_exit = asoc->next_tsn - 1; transport->ssthresh = max(transport->cwnd/2, 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; break; case SCTP_LOWER_CWND_ECNE: /* RFC 2481 Section 6.1.2. * If the sender receives an ECN-Echo ACK packet * then the sender knows that congestion was encountered in the * network on the path from the sender to the receiver. The * indication of congestion should be treated just as a * congestion loss in non-ECN Capable TCP. That is, the TCP * source halves the congestion window "cwnd" and reduces the * slow start threshold "ssthresh". * A critical condition is that TCP does not react to * congestion indications more than once every window of * data (or more loosely more than once every round-trip time). */ if (time_after(jiffies, transport->last_time_ecne_reduced + transport->rtt)) { transport->ssthresh = max(transport->cwnd/2, 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; transport->last_time_ecne_reduced = jiffies; } break; case SCTP_LOWER_CWND_INACTIVE: /* RFC 2960 Section 7.2.1, sctpimpguide * When the endpoint does not transmit data on a given * transport address, the cwnd of the transport address * should be adjusted to max(cwnd/2, 4*MTU) per RTO. * NOTE: Although the draft recommends that this check needs * to be done every RTO interval, we do it every hearbeat * interval. */ transport->cwnd = max(transport->cwnd/2, 4*asoc->pathmtu); /* RFC 4960 Errata 3.27.2: also adjust sshthresh */ transport->ssthresh = transport->cwnd; break; } transport->partial_bytes_acked = 0; pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n", __func__, transport, reason, transport->cwnd, transport->ssthresh); } /* Apply Max.Burst limit to the congestion window: * sctpimpguide-05 2.14.2 * D) When the time comes for the sender to * transmit new DATA chunks, the protocol parameter Max.Burst MUST * first be applied to limit how many new DATA chunks may be sent. * The limit is applied by adjusting cwnd as follows: * if ((flightsize+ Max.Burst * MTU) < cwnd) * cwnd = flightsize + Max.Burst * MTU */ void sctp_transport_burst_limited(struct sctp_transport *t) { struct sctp_association *asoc = t->asoc; u32 old_cwnd = t->cwnd; u32 max_burst_bytes; if (t->burst_limited || asoc->max_burst == 0) return; max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); if (max_burst_bytes < old_cwnd) { t->cwnd = max_burst_bytes; t->burst_limited = old_cwnd; } } /* Restore the old cwnd congestion window, after the burst had it's * desired effect. */ void sctp_transport_burst_reset(struct sctp_transport *t) { if (t->burst_limited) { t->cwnd = t->burst_limited; t->burst_limited = 0; } } /* What is the next timeout value for this transport? */ unsigned long sctp_transport_timeout(struct sctp_transport *trans) { /* RTO + timer slack +/- 50% of RTO */ unsigned long timeout = trans->rto >> 1; if (trans->state != SCTP_UNCONFIRMED && trans->state != SCTP_PF) timeout += trans->hbinterval; return max_t(unsigned long, timeout, HZ / 5); } /* Reset transport variables to their initial values */ void sctp_transport_reset(struct sctp_transport *t) { struct sctp_association *asoc = t->asoc; /* RFC 2960 (bis), Section 5.2.4 * All the congestion control parameters (e.g., cwnd, ssthresh) * related to this peer MUST be reset to their initial values * (see Section 6.2.1) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); t->burst_limited = 0; t->ssthresh = asoc->peer.i.a_rwnd; t->rto = asoc->rto_initial; sctp_max_rto(asoc, t); t->rtt = 0; t->srtt = 0; t->rttvar = 0; /* Reset these additional variables so that we have a clean slate. */ t->partial_bytes_acked = 0; t->flight_size = 0; t->error_count = 0; t->rto_pending = 0; t->hb_sent = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; t->cacc.cycling_changeover = 0; t->cacc.next_tsn_at_change = 0; t->cacc.cacc_saw_newack = 0; } /* Schedule retransmission on the given transport */ void sctp_transport_immediate_rtx(struct sctp_transport *t) { /* Stop pending T3_rtx_timer */ if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX); if (!timer_pending(&t->T3_rtx_timer)) { if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto)) sctp_transport_hold(t); } } /* Drop dst */ void sctp_transport_dst_release(struct sctp_transport *t) { dst_release(t->dst); t->dst = NULL; t->dst_pending_confirm = 0; } /* Schedule neighbour confirm */ void sctp_transport_dst_confirm(struct sctp_transport *t) { t->dst_pending_confirm = 1; } |
5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | // SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * smc_sysctl.c: sysctl interface to SMC subsystem. * * Copyright (c) 2022, Alibaba Inc. * * Author: Tony Lu <tonylu@linux.alibaba.com> * */ #include <linux/init.h> #include <linux/sysctl.h> #include <net/net_namespace.h> #include "smc.h" #include "smc_core.h" #include "smc_llc.h" #include "smc_sysctl.h" static int min_sndbuf = SMC_BUF_MIN_SIZE; static int min_rcvbuf = SMC_BUF_MIN_SIZE; static int max_sndbuf = INT_MAX / 2; static int max_rcvbuf = INT_MAX / 2; static const int net_smc_wmem_init = (64 * 1024); static const int net_smc_rmem_init = (64 * 1024); static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; static struct ctl_table smc_table[] = { { .procname = "autocorking_size", .data = &init_net.smc.sysctl_autocorking_size, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec, }, { .procname = "smcr_buf_type", .data = &init_net.smc.sysctl_smcr_buf_type, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "smcr_testlink_time", .data = &init_net.smc.sysctl_smcr_testlink_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "wmem", .data = &init_net.smc.sysctl_wmem, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_sndbuf, .extra2 = &max_sndbuf, }, { .procname = "rmem", .data = &init_net.smc.sysctl_rmem, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_rcvbuf, .extra2 = &max_rcvbuf, }, { .procname = "smcr_max_links_per_lgr", .data = &init_net.smc.sysctl_max_links_per_lgr, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &links_per_lgr_min, .extra2 = &links_per_lgr_max, }, { .procname = "smcr_max_conns_per_lgr", .data = &init_net.smc.sysctl_max_conns_per_lgr, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &conns_per_lgr_min, .extra2 = &conns_per_lgr_max, }, { .procname = "limit_smc_hs", .data = &init_net.smc.limit_smc_hs, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, }; int __net_init smc_sysctl_net_init(struct net *net) { size_t table_size = ARRAY_SIZE(smc_table); struct ctl_table *table; table = smc_table; if (!net_eq(net, &init_net)) { int i; table = kmemdup(table, sizeof(smc_table), GFP_KERNEL); if (!table) goto err_alloc; for (i = 0; i < table_size; i++) table[i].data += (void *)net - (void *)&init_net; } net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table, table_size); if (!net->smc.smc_hdr) goto err_reg; net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; /* disable handshake limitation by default */ net->smc.limit_smc_hs = 0; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } void __net_exit smc_sysctl_net_exit(struct net *net) { const struct ctl_table *table; table = net->smc.smc_hdr->ctl_table_arg; unregister_net_sysctl_table(net->smc.smc_hdr); if (!net_eq(net, &init_net)) kfree(table); } |
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 | // SPDX-License-Identifier: GPL-2.0-only /* * vDPA bus. * * Copyright (c) 2020, Red Hat. All rights reserved. * Author: Jason Wang <jasowang@redhat.com> * */ #include <linux/module.h> #include <linux/idr.h> #include <linux/slab.h> #include <linux/vdpa.h> #include <uapi/linux/vdpa.h> #include <net/genetlink.h> #include <linux/mod_devicetable.h> #include <linux/virtio_ids.h> static LIST_HEAD(mdev_head); /* A global mutex that protects vdpa management device and device level operations. */ static DECLARE_RWSEM(vdpa_dev_lock); static DEFINE_IDA(vdpa_index_ida); void vdpa_set_status(struct vdpa_device *vdev, u8 status) { down_write(&vdev->cf_lock); vdev->config->set_status(vdev, status); up_write(&vdev->cf_lock); } EXPORT_SYMBOL(vdpa_set_status); static struct genl_family vdpa_nl_family; static int vdpa_dev_probe(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); const struct vdpa_config_ops *ops = vdev->config; u32 max_num, min_num = 1; int ret = 0; d->dma_mask = &d->coherent_dma_mask; ret = dma_set_mask_and_coherent(d, DMA_BIT_MASK(64)); if (ret) return ret; max_num = ops->get_vq_num_max(vdev); if (ops->get_vq_num_min) min_num = ops->get_vq_num_min(vdev); if (max_num < min_num) return -EINVAL; if (drv && drv->probe) ret = drv->probe(vdev); return ret; } static void vdpa_dev_remove(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); if (drv && drv->remove) drv->remove(vdev); } static int vdpa_dev_match(struct device *dev, const struct device_driver *drv) { struct vdpa_device *vdev = dev_to_vdpa(dev); /* Check override first, and if set, only use the named driver */ if (vdev->driver_override) return strcmp(vdev->driver_override, drv->name) == 0; /* Currently devices must be supported by all vDPA bus drivers */ return 1; } static ssize_t driver_override_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct vdpa_device *vdev = dev_to_vdpa(dev); int ret; ret = driver_set_override(dev, &vdev->driver_override, buf, count); if (ret) return ret; return count; } static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct vdpa_device *vdev = dev_to_vdpa(dev); ssize_t len; device_lock(dev); len = sysfs_emit(buf, "%s\n", vdev->driver_override); device_unlock(dev); return len; } static DEVICE_ATTR_RW(driver_override); static struct attribute *vdpa_dev_attrs[] = { &dev_attr_driver_override.attr, NULL, }; static const struct attribute_group vdpa_dev_group = { .attrs = vdpa_dev_attrs, }; __ATTRIBUTE_GROUPS(vdpa_dev); static const struct bus_type vdpa_bus = { .name = "vdpa", .dev_groups = vdpa_dev_groups, .match = vdpa_dev_match, .probe = vdpa_dev_probe, .remove = vdpa_dev_remove, }; static void vdpa_release_dev(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); const struct vdpa_config_ops *ops = vdev->config; if (ops->free) ops->free(vdev); ida_free(&vdpa_index_ida, vdev->index); kfree(vdev->driver_override); kfree(vdev); } /** * __vdpa_alloc_device - allocate and initilaize a vDPA device * This allows driver to some prepartion after device is * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device * @ngroups: number of groups supported by this device * @nas: number of address spaces supported by this device * @size: size of the parent structure that contains private data * @name: name of the vdpa device; optional. * @use_va: indicate whether virtual address must be used by this device * * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. * * Return: Returns an error when parent/config/dma_dev is not set or fail to get * ida. */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va) { struct vdpa_device *vdev; int err = -EINVAL; if (!config) goto err; if (!!config->dma_map != !!config->dma_unmap) goto err; /* It should only work for the device that use on-chip IOMMU */ if (use_va && !(config->dma_map || config->set_map)) goto err; err = -ENOMEM; vdev = kzalloc(size, GFP_KERNEL); if (!vdev) goto err; err = ida_alloc(&vdpa_index_ida, GFP_KERNEL); if (err < 0) goto err_ida; vdev->dev.bus = &vdpa_bus; vdev->dev.parent = parent; vdev->dev.release = vdpa_release_dev; vdev->index = err; vdev->config = config; vdev->features_valid = false; vdev->use_va = use_va; vdev->ngroups = ngroups; vdev->nas = nas; if (name) err = dev_set_name(&vdev->dev, "%s", name); else err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); if (err) goto err_name; init_rwsem(&vdev->cf_lock); device_initialize(&vdev->dev); return vdev; err_name: ida_free(&vdpa_index_ida, vdev->index); err_ida: kfree(vdev); err: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(__vdpa_alloc_device); static int vdpa_name_match(struct device *dev, const void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); return (strcmp(dev_name(&vdev->dev), data) == 0); } static int __vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { struct device *dev; vdev->nvqs = nvqs; lockdep_assert_held(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match); if (dev) { put_device(dev); return -EEXIST; } return device_add(&vdev->dev); } /** * _vdpa_register_device - register a vDPA device with vdpa lock held * Caller must have a succeed call of vdpa_alloc_device() before. * Caller must invoke this routine in the management device dev_add() * callback after setting up valid mgmtdev for this vdpa device. * @vdev: the vdpa device to be registered to vDPA bus * @nvqs: number of virtqueues supported by this device * * Return: Returns an error when fail to add device to vDPA bus */ int _vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { if (!vdev->mdev) return -EINVAL; return __vdpa_register_device(vdev, nvqs); } EXPORT_SYMBOL_GPL(_vdpa_register_device); /** * vdpa_register_device - register a vDPA device * Callers must have a succeed call of vdpa_alloc_device() before. * @vdev: the vdpa device to be registered to vDPA bus * @nvqs: number of virtqueues supported by this device * * Return: Returns an error when fail to add to vDPA bus */ int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { int err; down_write(&vdpa_dev_lock); err = __vdpa_register_device(vdev, nvqs); up_write(&vdpa_dev_lock); return err; } EXPORT_SYMBOL_GPL(vdpa_register_device); /** * _vdpa_unregister_device - unregister a vDPA device * Caller must invoke this routine as part of management device dev_del() * callback. * @vdev: the vdpa device to be unregisted from vDPA bus */ void _vdpa_unregister_device(struct vdpa_device *vdev) { lockdep_assert_held(&vdpa_dev_lock); WARN_ON(!vdev->mdev); device_unregister(&vdev->dev); } EXPORT_SYMBOL_GPL(_vdpa_unregister_device); /** * vdpa_unregister_device - unregister a vDPA device * @vdev: the vdpa device to be unregisted from vDPA bus */ void vdpa_unregister_device(struct vdpa_device *vdev) { down_write(&vdpa_dev_lock); device_unregister(&vdev->dev); up_write(&vdpa_dev_lock); } EXPORT_SYMBOL_GPL(vdpa_unregister_device); /** * __vdpa_register_driver - register a vDPA device driver * @drv: the vdpa device driver to be registered * @owner: module owner of the driver * * Return: Returns an err when fail to do the registration */ int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner) { drv->driver.bus = &vdpa_bus; drv->driver.owner = owner; return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(__vdpa_register_driver); /** * vdpa_unregister_driver - unregister a vDPA device driver * @drv: the vdpa device driver to be unregistered */ void vdpa_unregister_driver(struct vdpa_driver *drv) { driver_unregister(&drv->driver); } EXPORT_SYMBOL_GPL(vdpa_unregister_driver); /** * vdpa_mgmtdev_register - register a vdpa management device * * @mdev: Pointer to vdpa management device * vdpa_mgmtdev_register() register a vdpa management device which supports * vdpa device management. * Return: Returns 0 on success or failure when required callback ops are not * initialized. */ int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev) { if (!mdev->device || !mdev->ops || !mdev->ops->dev_add || !mdev->ops->dev_del) return -EINVAL; INIT_LIST_HEAD(&mdev->list); down_write(&vdpa_dev_lock); list_add_tail(&mdev->list, &mdev_head); up_write(&vdpa_dev_lock); return 0; } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_register); static int vdpa_match_remove(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); struct vdpa_mgmt_dev *mdev = vdev->mdev; if (mdev == data) mdev->ops->dev_del(mdev, vdev); return 0; } void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) { down_write(&vdpa_dev_lock); list_del(&mdev->list); /* Filter out all the entries belong to this management device and delete it. */ bus_for_each_dev(&vdpa_bus, NULL, mdev, vdpa_match_remove); up_write(&vdpa_dev_lock); } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); static void vdpa_get_config_unlocked(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len) { const struct vdpa_config_ops *ops = vdev->config; /* * Config accesses aren't supposed to trigger before features are set. * If it does happen we assume a legacy guest. */ if (!vdev->features_valid) vdpa_set_features_unlocked(vdev, 0); ops->get_config(vdev, offset, buf, len); } /** * vdpa_get_config - Get one or more device configuration fields. * @vdev: vdpa device to operate on * @offset: starting byte offset of the field * @buf: buffer pointer to read to * @len: length of the configuration fields in bytes */ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len) { down_read(&vdev->cf_lock); vdpa_get_config_unlocked(vdev, offset, buf, len); up_read(&vdev->cf_lock); } EXPORT_SYMBOL_GPL(vdpa_get_config); /** * vdpa_set_config - Set one or more device configuration fields. * @vdev: vdpa device to operate on * @offset: starting byte offset of the field * @buf: buffer pointer to read from * @length: length of the configuration fields in bytes */ void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, unsigned int length) { down_write(&vdev->cf_lock); vdev->config->set_config(vdev, offset, buf, length); up_write(&vdev->cf_lock); } EXPORT_SYMBOL_GPL(vdpa_set_config); static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev, const char *busname, const char *devname) { /* Bus name is optional for simulated management device, so ignore the * device with bus if bus attribute is provided. */ if ((busname && !mdev->device->bus) || (!busname && mdev->device->bus)) return false; if (!busname && strcmp(dev_name(mdev->device), devname) == 0) return true; if (busname && (strcmp(mdev->device->bus->name, busname) == 0) && (strcmp(dev_name(mdev->device), devname) == 0)) return true; return false; } static struct vdpa_mgmt_dev *vdpa_mgmtdev_get_from_attr(struct nlattr **attrs) { struct vdpa_mgmt_dev *mdev; const char *busname = NULL; const char *devname; if (!attrs[VDPA_ATTR_MGMTDEV_DEV_NAME]) return ERR_PTR(-EINVAL); devname = nla_data(attrs[VDPA_ATTR_MGMTDEV_DEV_NAME]); if (attrs[VDPA_ATTR_MGMTDEV_BUS_NAME]) busname = nla_data(attrs[VDPA_ATTR_MGMTDEV_BUS_NAME]); list_for_each_entry(mdev, &mdev_head, list) { if (mgmtdev_handle_match(mdev, busname, devname)) return mdev; } return ERR_PTR(-ENODEV); } static int vdpa_nl_mgmtdev_handle_fill(struct sk_buff *msg, const struct vdpa_mgmt_dev *mdev) { if (mdev->device->bus && nla_put_string(msg, VDPA_ATTR_MGMTDEV_BUS_NAME, mdev->device->bus->name)) return -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, dev_name(mdev->device))) return -EMSGSIZE; return 0; } static u64 vdpa_mgmtdev_get_classes(const struct vdpa_mgmt_dev *mdev, unsigned int *nclasses) { u64 supported_classes = 0; unsigned int n = 0; for (int i = 0; mdev->id_table[i].device; i++) { if (mdev->id_table[i].device > 63) continue; supported_classes |= BIT_ULL(mdev->id_table[i].device); n++; } if (nclasses) *nclasses = n; return supported_classes; } static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *msg, u32 portid, u32 seq, int flags) { void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_MGMTDEV_NEW); if (!hdr) return -EMSGSIZE; err = vdpa_nl_mgmtdev_handle_fill(msg, mdev); if (err) goto msg_err; if (nla_put_u64_64bit(msg, VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES, vdpa_mgmtdev_get_classes(mdev, NULL), VDPA_ATTR_UNSPEC)) { err = -EMSGSIZE; goto msg_err; } if (nla_put_u32(msg, VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, mdev->max_supported_vqs)) { err = -EMSGSIZE; goto msg_err; } if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_SUPPORTED_FEATURES, mdev->supported_features, VDPA_ATTR_PAD)) { err = -EMSGSIZE; goto msg_err; } genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); return err; } static int vdpa_nl_cmd_mgmtdev_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_mgmt_dev *mdev; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; down_read(&vdpa_dev_lock); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { up_read(&vdpa_dev_lock); NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified mgmt device"); err = PTR_ERR(mdev); goto out; } err = vdpa_mgmtdev_fill(mdev, msg, info->snd_portid, info->snd_seq, 0); up_read(&vdpa_dev_lock); if (err) goto out; err = genlmsg_reply(msg, info); return err; out: nlmsg_free(msg); return err; } static int vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct vdpa_mgmt_dev *mdev; int start = cb->args[0]; int idx = 0; int err; down_read(&vdpa_dev_lock); list_for_each_entry(mdev, &mdev_head, list) { if (idx < start) { idx++; continue; } err = vdpa_mgmtdev_fill(mdev, msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) goto out; idx++; } out: up_read(&vdpa_dev_lock); cb->args[0] = idx; return msg->len; } #define VDPA_DEV_NET_ATTRS_MASK (BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | \ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) /* * Bitmask for all per-device features: feature bits VIRTIO_TRANSPORT_F_START * through VIRTIO_TRANSPORT_F_END are unset, i.e. 0xfffffc000fffffff for * all 64bit features. If the features are extended beyond 64 bits, or new * "holes" are reserved for other type of features than per-device, this * macro would have to be updated. */ #define VIRTIO_DEVICE_F_MASK (~0ULL << (VIRTIO_TRANSPORT_F_END + 1) | \ ((1ULL << VIRTIO_TRANSPORT_F_START) - 1)) static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_dev_set_config config = {}; struct nlattr **nl_attrs = info->attrs; struct vdpa_mgmt_dev *mdev; unsigned int ncls = 0; const u8 *macaddr; const char *name; u64 classes; int err = 0; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); memcpy(config.net.mac, macaddr, sizeof(config.net.mac)); config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); } if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) { config.net.mtu = nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU); } if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) { config.net.max_vq_pairs = nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]); if (!config.net.max_vq_pairs) { NL_SET_ERR_MSG_MOD(info->extack, "At least one pair of VQs is required"); return -EINVAL; } config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); } if (nl_attrs[VDPA_ATTR_DEV_FEATURES]) { u64 missing = 0x0ULL; config.device_features = nla_get_u64(nl_attrs[VDPA_ATTR_DEV_FEATURES]); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR] && !(config.device_features & BIT_ULL(VIRTIO_NET_F_MAC))) missing |= BIT_ULL(VIRTIO_NET_F_MAC); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU] && !(config.device_features & BIT_ULL(VIRTIO_NET_F_MTU))) missing |= BIT_ULL(VIRTIO_NET_F_MTU); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP] && config.net.max_vq_pairs > 1 && !(config.device_features & BIT_ULL(VIRTIO_NET_F_MQ))) missing |= BIT_ULL(VIRTIO_NET_F_MQ); if (missing) { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Missing features 0x%llx for provided attributes", missing); return -EINVAL; } config.mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); } /* Skip checking capability if user didn't prefer to configure any * device networking attributes. It is likely that user might have used * a device specific method to configure such attributes or using device * default attributes. */ if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; down_write(&vdpa_dev_lock); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified management device"); err = PTR_ERR(mdev); goto err; } if ((config.mask & mdev->config_attr_mask) != config.mask) { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Some provided attributes are not supported: 0x%llx", config.mask & ~mdev->config_attr_mask); err = -EOPNOTSUPP; goto err; } classes = vdpa_mgmtdev_get_classes(mdev, &ncls); if (config.mask & VDPA_DEV_NET_ATTRS_MASK && !(classes & BIT_ULL(VIRTIO_ID_NET))) { NL_SET_ERR_MSG_MOD(info->extack, "Network class attributes provided on unsupported management device"); err = -EINVAL; goto err; } if (!(config.mask & VDPA_DEV_NET_ATTRS_MASK) && config.mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES) && classes & BIT_ULL(VIRTIO_ID_NET) && ncls > 1 && config.device_features & VIRTIO_DEVICE_F_MASK) { NL_SET_ERR_MSG_MOD(info->extack, "Management device supports multi-class while device features specified are ambiguous"); err = -EINVAL; goto err; } err = mdev->ops->dev_add(mdev, name, &config); err: up_write(&vdpa_dev_lock); return err; } static int vdpa_nl_cmd_dev_del_set_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_mgmt_dev *mdev; struct vdpa_device *vdev; struct device *dev; const char *name; int err = 0; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); down_write(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "Only user created device can be deleted by user"); err = -EINVAL; goto mdev_err; } mdev = vdev->mdev; mdev->ops->dev_del(mdev, vdev); mdev_err: put_device(dev); dev_err: up_write(&vdpa_dev_lock); return err; } static int vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { u16 max_vq_size; u16 min_vq_size = 1; u32 device_id; u32 vendor_id; void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_NEW); if (!hdr) return -EMSGSIZE; err = vdpa_nl_mgmtdev_handle_fill(msg, vdev->mdev); if (err) goto msg_err; device_id = vdev->config->get_device_id(vdev); vendor_id = vdev->config->get_vendor_id(vdev); max_vq_size = vdev->config->get_vq_num_max(vdev); if (vdev->config->get_vq_num_min) min_vq_size = vdev->config->get_vq_num_min(vdev); err = -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) goto msg_err; if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) goto msg_err; if (nla_put_u32(msg, VDPA_ATTR_DEV_VENDOR_ID, vendor_id)) goto msg_err; if (nla_put_u32(msg, VDPA_ATTR_DEV_MAX_VQS, vdev->nvqs)) goto msg_err; if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size)) goto msg_err; if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size)) goto msg_err; genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); return err; } static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct sk_buff *msg; const char *devname; struct device *dev; int err; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { err = -EINVAL; goto mdev_err; } err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack); if (err) goto mdev_err; err = genlmsg_reply(msg, info); put_device(dev); up_read(&vdpa_dev_lock); return err; mdev_err: put_device(dev); err: up_read(&vdpa_dev_lock); nlmsg_free(msg); return err; } struct vdpa_dev_dump_info { struct sk_buff *msg; struct netlink_callback *cb; int start_idx; int idx; }; static int vdpa_dev_dump(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); struct vdpa_dev_dump_info *info = data; int err; if (!vdev->mdev) return 0; if (info->idx < info->start_idx) { info->idx++; return 0; } err = vdpa_dev_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, info->cb->nlh->nlmsg_seq, NLM_F_MULTI, info->cb->extack); if (err) return err; info->idx++; return 0; } static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct vdpa_dev_dump_info info; info.msg = msg; info.cb = cb; info.start_idx = cb->args[0]; info.idx = 0; down_read(&vdpa_dev_lock); bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_dump); up_read(&vdpa_dev_lock); cb->args[0] = info.idx; return msg->len; } static int vdpa_dev_net_mq_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0 && (features & BIT_ULL(VIRTIO_NET_F_RSS)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->max_virtqueue_pairs); return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); } static int vdpa_dev_net_mtu_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_NET_F_MTU)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->mtu); return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16); } static int vdpa_dev_net_mac_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { if ((features & BIT_ULL(VIRTIO_NET_F_MAC)) == 0) return 0; return nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config->mac), config->mac); } static int vdpa_dev_net_status_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_NET_F_STATUS)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->status); return nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16); } static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_net_config config = {}; u64 features_device; vdev->config->get_config(vdev, 0, &config, sizeof(config)); features_device = vdev->config->get_device_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; if (vdpa_dev_net_mtu_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_net_mac_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_net_status_config_fill(msg, features_device, &config)) return -EMSGSIZE; return vdpa_dev_net_mq_config_fill(msg, features_device, &config); } static int vdpa_dev_blk_capacity_config_fill(struct sk_buff *msg, const struct virtio_blk_config *config) { u64 val_u64; val_u64 = __virtio64_to_cpu(true, config->capacity); return nla_put_u64_64bit(msg, VDPA_ATTR_DEV_BLK_CFG_CAPACITY, val_u64, VDPA_ATTR_PAD); } static int vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_SIZE_MAX)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->size_max); return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32); } /* fill the block size*/ static int vdpa_dev_blk_block_size_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_BLK_SIZE)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->blk_size); return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, val_u32); } static int vdpa_dev_blk_seg_max_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_SEG_MAX)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->seg_max); return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, val_u32); } static int vdpa_dev_blk_mq_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_BLK_F_MQ)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->num_queues); return nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, val_u16); } static int vdpa_dev_blk_topology_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u16 min_io_size; u32 opt_io_size; if ((features & BIT_ULL(VIRTIO_BLK_F_TOPOLOGY)) == 0) return 0; min_io_size = __virtio16_to_cpu(true, config->min_io_size); opt_io_size = __virtio32_to_cpu(true, config->opt_io_size); if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_PHY_BLK_EXP, config->physical_block_exp)) return -EMSGSIZE; if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_ALIGN_OFFSET, config->alignment_offset)) return -EMSGSIZE; if (nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_MIN_IO_SIZE, min_io_size)) return -EMSGSIZE; if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_OPT_IO_SIZE, opt_io_size)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_discard_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_DISCARD)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->max_discard_sectors); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEC, val_u32)) return -EMSGSIZE; val_u32 = __virtio32_to_cpu(true, config->max_discard_seg); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEG, val_u32)) return -EMSGSIZE; val_u32 = __virtio32_to_cpu(true, config->discard_sector_alignment); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN, val_u32)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_write_zeroes_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_WRITE_ZEROES)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_sectors); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, val_u32)) return -EMSGSIZE; val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_seg); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, val_u32)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features) { u8 ro; ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1; if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features) { u8 flush; flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1; if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_blk_config config = {}; u64 features_device; vdev->config->get_config(vdev, 0, &config, sizeof(config)); features_device = vdev->config->get_device_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; if (vdpa_dev_blk_capacity_config_fill(msg, &config)) return -EMSGSIZE; if (vdpa_dev_blk_seg_size_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_block_size_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_seg_max_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_mq_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_topology_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_discard_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_write_zeroes_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_ro_config_fill(msg, features_device)) return -EMSGSIZE; if (vdpa_dev_blk_flush_config_fill(msg, features_device)) return -EMSGSIZE; return 0; } static int vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { u64 features_driver; u8 status = 0; u32 device_id; void *hdr; int err; down_read(&vdev->cf_lock); hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_CONFIG_GET); if (!hdr) { err = -EMSGSIZE; goto out; } if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { err = -EMSGSIZE; goto msg_err; } device_id = vdev->config->get_device_id(vdev); if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { err = -EMSGSIZE; goto msg_err; } /* only read driver features after the feature negotiation is done */ status = vdev->config->get_status(vdev); if (status & VIRTIO_CONFIG_S_FEATURES_OK) { features_driver = vdev->config->get_driver_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver, VDPA_ATTR_PAD)) { err = -EMSGSIZE; goto msg_err; } } switch (device_id) { case VIRTIO_ID_NET: err = vdpa_dev_net_config_fill(vdev, msg); break; case VIRTIO_ID_BLOCK: err = vdpa_dev_blk_config_fill(vdev, msg); break; default: err = -EOPNOTSUPP; break; } if (err) goto msg_err; up_read(&vdev->cf_lock); genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); out: up_read(&vdev->cf_lock); return err; } static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, struct genl_info *info, u32 index) { struct virtio_net_config config = {}; u64 features; u8 status; int err; status = vdev->config->get_status(vdev); if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { NL_SET_ERR_MSG_MOD(info->extack, "feature negotiation not complete"); return -EAGAIN; } vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); features = vdev->config->get_driver_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features, VDPA_ATTR_PAD)) return -EMSGSIZE; err = vdpa_dev_net_mq_config_fill(msg, features, &config); if (err) return err; if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index)) return -EMSGSIZE; err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack); if (err) return err; return 0; } static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, struct genl_info *info, u32 index) { int err; down_read(&vdev->cf_lock); if (!vdev->config->get_vendor_vq_stats) { err = -EOPNOTSUPP; goto out; } err = vdpa_fill_stats_rec(vdev, msg, info, index); out: up_read(&vdev->cf_lock); return err; } static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, struct genl_info *info, u32 index) { u32 device_id; void *hdr; int err; u32 portid = info->snd_portid; u32 seq = info->snd_seq; u32 flags = 0; hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_VSTATS_GET); if (!hdr) return -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { err = -EMSGSIZE; goto undo_msg; } device_id = vdev->config->get_device_id(vdev); if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { err = -EMSGSIZE; goto undo_msg; } switch (device_id) { case VIRTIO_ID_NET: if (index > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { NL_SET_ERR_MSG_MOD(info->extack, "queue index exceeds max value"); err = -ERANGE; break; } err = vendor_stats_fill(vdev, msg, info, index); break; default: err = -EOPNOTSUPP; break; } genlmsg_end(msg, hdr); return err; undo_msg: genlmsg_cancel(msg, hdr); return err; } static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct sk_buff *msg; const char *devname; struct device *dev; int err; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); err = -EINVAL; goto mdev_err; } err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack); if (!err) err = genlmsg_reply(msg, info); mdev_err: put_device(dev); dev_err: up_read(&vdpa_dev_lock); if (err) nlmsg_free(msg); return err; } static int vdpa_dev_net_device_attr_set(struct vdpa_device *vdev, struct genl_info *info) { struct vdpa_dev_set_config set_config = {}; struct vdpa_mgmt_dev *mdev = vdev->mdev; struct nlattr **nl_attrs = info->attrs; const u8 *macaddr; int err = -EOPNOTSUPP; down_write(&vdev->cf_lock); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { set_config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); if (is_valid_ether_addr(macaddr)) { ether_addr_copy(set_config.net.mac, macaddr); if (mdev->ops->dev_set_attr) { err = mdev->ops->dev_set_attr(mdev, vdev, &set_config); } else { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Operation not supported by the device."); } } else { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Invalid MAC address"); } } up_write(&vdev->cf_lock); return err; } static int vdpa_nl_cmd_dev_attr_set_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct device *dev; const char *name; u64 classes; int err = 0; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); down_write(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); err = -EINVAL; goto mdev_err; } classes = vdpa_mgmtdev_get_classes(vdev->mdev, NULL); if (classes & BIT_ULL(VIRTIO_ID_NET)) { err = vdpa_dev_net_device_attr_set(vdev, info); } else { NL_SET_ERR_MSG_FMT_MOD(info->extack, "%s device not supported", name); } mdev_err: put_device(dev); dev_err: up_write(&vdpa_dev_lock); return err; } static int vdpa_dev_config_dump(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); struct vdpa_dev_dump_info *info = data; int err; if (!vdev->mdev) return 0; if (info->idx < info->start_idx) { info->idx++; return 0; } err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, info->cb->nlh->nlmsg_seq, NLM_F_MULTI, info->cb->extack); if (err) return err; info->idx++; return 0; } static int vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct vdpa_dev_dump_info info; info.msg = msg; info.cb = cb; info.start_idx = cb->args[0]; info.idx = 0; down_read(&vdpa_dev_lock); bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); up_read(&vdpa_dev_lock); cb->args[0] = info.idx; return msg->len; } static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct sk_buff *msg; const char *devname; struct device *dev; u32 index; int err; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]) return -EINVAL; devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]); down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); err = -EINVAL; goto mdev_err; } err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index); if (err) goto mdev_err; err = genlmsg_reply(msg, info); put_device(dev); up_read(&vdpa_dev_lock); return err; mdev_err: put_device(dev); dev_err: nlmsg_free(msg); up_read(&vdpa_dev_lock); return err; } static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 }, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 }, [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 }, }; static const struct genl_ops vdpa_nl_ops[] = { { .cmd = VDPA_CMD_MGMTDEV_GET, .doit = vdpa_nl_cmd_mgmtdev_get_doit, .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_NEW, .doit = vdpa_nl_cmd_dev_add_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_DEL, .doit = vdpa_nl_cmd_dev_del_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_GET, .doit = vdpa_nl_cmd_dev_get_doit, .dumpit = vdpa_nl_cmd_dev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_CONFIG_GET, .doit = vdpa_nl_cmd_dev_config_get_doit, .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, }, { .cmd = VDPA_CMD_DEV_VSTATS_GET, .doit = vdpa_nl_cmd_dev_stats_get_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_ATTR_SET, .doit = vdpa_nl_cmd_dev_attr_set_doit, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family vdpa_nl_family __ro_after_init = { .name = VDPA_GENL_NAME, .version = VDPA_GENL_VERSION, .maxattr = VDPA_ATTR_MAX, .policy = vdpa_nl_policy, .netnsok = false, .module = THIS_MODULE, .ops = vdpa_nl_ops, .n_ops = ARRAY_SIZE(vdpa_nl_ops), .resv_start_op = VDPA_CMD_DEV_VSTATS_GET + 1, }; static int vdpa_init(void) { int err; err = bus_register(&vdpa_bus); if (err) return err; err = genl_register_family(&vdpa_nl_family); if (err) goto err; return 0; err: bus_unregister(&vdpa_bus); return err; } static void __exit vdpa_exit(void) { genl_unregister_family(&vdpa_nl_family); bus_unregister(&vdpa_bus); ida_destroy(&vdpa_index_ida); } core_initcall(vdpa_init); module_exit(vdpa_exit); MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>"); MODULE_DESCRIPTION("vDPA bus"); MODULE_LICENSE("GPL v2"); |
2466 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KERNEL_PRINTK__ #define __KERNEL_PRINTK__ #include <linux/stdarg.h> #include <linux/init.h> #include <linux/kern_levels.h> #include <linux/linkage.h> #include <linux/ratelimit_types.h> #include <linux/once_lite.h> struct console; extern const char linux_banner[]; extern const char linux_proc_banner[]; extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ #define PRINTK_MAX_SINGLE_HEADER_LEN 2 static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { switch (buffer[1]) { case '0' ... '7': case 'c': /* KERN_CONT */ return buffer[1]; } } return 0; } static inline const char *printk_skip_level(const char *buffer) { if (printk_get_level(buffer)) return buffer + 2; return buffer; } static inline const char *printk_skip_headers(const char *buffer) { while (printk_get_level(buffer)) buffer = printk_skip_level(buffer); return buffer; } /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT /* We show everything that is MORE important than this.. */ #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ /* * Default used to be hard-coded at 7, quiet used to be hardcoded at 4, * we're now allowing both to be set from kernel config. */ #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET int match_devname_and_update_preferred_console(const char *match, const char *name, const short idx); extern int console_printk[]; #define console_loglevel (console_printk[0]) #define default_message_loglevel (console_printk[1]) #define minimum_console_loglevel (console_printk[2]) #define default_console_loglevel (console_printk[3]) extern void console_verbose(void); /* strlen("ratelimit") + 1 */ #define DEVKMSG_STR_MAX_SIZE 10 extern char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE]; struct ctl_table; extern int suppress_printk; struct va_format { const char *fmt; va_list *va; }; /* * FW_BUG * Add this to a message where you are sure the firmware is buggy or behaves * really stupid or out of spec. Be aware that the responsible BIOS developer * should be able to fix this issue or at least get a concrete idea of the * problem by reading your message without the need of looking at the kernel * code. * * Use it for definite and high priority BIOS bugs. * * FW_WARN * Use it for not that clear (e.g. could the kernel messed up things already?) * and medium priority BIOS bugs. * * FW_INFO * Use this one if you want to tell the user or vendor about something * suspicious, but generally harmless related to the firmware. * * Use it for information or very low priority BIOS bugs. */ #define FW_BUG "[Firmware Bug]: " #define FW_WARN "[Firmware Warn]: " #define FW_INFO "[Firmware Info]: " /* * HW_ERR * Add this to a message for hardware errors, so that user can report * it to hardware vendor instead of LKML or software vendor. */ #define HW_ERR "[Hardware Error]: " /* * DEPRECATED * Add this to a message whenever you want to warn user space about the use * of a deprecated aspect of an API so they can stop using it */ #define DEPRECATED "[Deprecated]: " /* * Dummy printk for disabled debugging statements to use whilst maintaining * gcc's format checking. */ #define no_printk(fmt, ...) \ ({ \ if (0) \ _printk(fmt, ##__VA_ARGS__); \ 0; \ }) #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); #else static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif struct dev_printk_info; #ifdef CONFIG_PRINTK asmlinkage __printf(4, 0) int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args); asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); asmlinkage __printf(1, 2) __cold int _printk(const char *fmt, ...); /* * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); extern void __printk_deferred_enter(void); extern void __printk_deferred_exit(void); /* * The printk_deferred_enter/exit macros are available only as a hack for * some code paths that need to defer all printk console printing. Interrupts * must be disabled for the deferred duration. */ #define printk_deferred_enter() __printk_deferred_enter() #define printk_deferred_exit() __printk_deferred_exit() /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use * printk_ratelimited() or plain old __ratelimit(). */ extern int __printk_ratelimit(const char *func); #define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); extern int printk_delay_msec; extern int dmesg_restrict; extern void wake_up_klogd(void); char *log_buf_addr_get(void); u32 log_buf_len_get(void); void log_buf_vmcoreinfo_setup(void); void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); void printk_legacy_allow_panic_sync(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); void nbcon_atomic_flush_unsafe(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) { return 0; } static inline __printf(1, 2) __cold int _printk(const char *s, ...) { return 0; } static inline __printf(1, 2) __cold int _printk_deferred(const char *s, ...) { return 0; } static inline void printk_deferred_enter(void) { } static inline void printk_deferred_exit(void) { } static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec) { return false; } static inline void wake_up_klogd(void) { } static inline char *log_buf_addr_get(void) { return NULL; } static inline u32 log_buf_len_get(void) { return 0; } static inline void log_buf_vmcoreinfo_setup(void) { } static inline void setup_log_buf(int early) { } static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...) { } static inline void dump_stack_print_info(const char *log_lvl) { } static inline void show_regs_print_info(const char *log_lvl) { } static inline void dump_stack_lvl(const char *log_lvl) { } static inline void dump_stack(void) { } static inline void printk_trigger_flush(void) { } static inline void console_try_replay_all(void) { } static inline void printk_legacy_allow_panic_sync(void) { } static inline bool nbcon_device_try_acquire(struct console *con) { return false; } static inline void nbcon_device_release(struct console *con) { } static inline void nbcon_atomic_flush_unsafe(void) { } #endif bool this_cpu_in_panic(void); #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); extern void __printk_cpu_sync_put(void); #else #define __printk_cpu_sync_try_get() true #define __printk_cpu_sync_wait() #define __printk_cpu_sync_put() #endif /* CONFIG_SMP */ /** * printk_cpu_sync_get_irqsave() - Disable interrupts and acquire the printk * cpu-reentrant spinning lock. * @flags: Stack-allocated storage for saving local interrupt state, * to be passed to printk_cpu_sync_put_irqrestore(). * * If the lock is owned by another CPU, spin until it becomes available. * Interrupts are restored while spinning. * * CAUTION: This function must be used carefully. It does not behave like a * typical lock. Here are important things to watch out for... * * * This function is reentrant on the same CPU. Therefore the calling * code must not assume exclusive access to data if code accessing the * data can run reentrant or within NMI context on the same CPU. * * * If there exists usage of this function from NMI context, it becomes * unsafe to perform any type of locking or spinning to wait for other * CPUs after calling this function from any context. This includes * using spinlocks or any other busy-waiting synchronization methods. */ #define printk_cpu_sync_get_irqsave(flags) \ for (;;) { \ local_irq_save(flags); \ if (__printk_cpu_sync_try_get()) \ break; \ local_irq_restore(flags); \ __printk_cpu_sync_wait(); \ } /** * printk_cpu_sync_put_irqrestore() - Release the printk cpu-reentrant spinning * lock and restore interrupts. * @flags: Caller's saved interrupt state, from printk_cpu_sync_get_irqsave(). */ #define printk_cpu_sync_put_irqrestore(flags) \ do { \ __printk_cpu_sync_put(); \ local_irq_restore(flags); \ } while (0) extern int kptr_restrict; /** * pr_fmt - used by the pr_*() macros to generate the printk format string * @fmt: format string passed from a pr_*() macro * * This macro can be used to generate a unified format string for pr_*() * macros. A common use is to prefix all pr_*() messages in a file with a common * string. For example, defining this at the top of a source file: * * #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt * * would prefix all pr_info, pr_emerg... messages in the file with the module * name. */ #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif struct module; #ifdef CONFIG_PRINTK_INDEX struct pi_entry { const char *fmt; const char *func; const char *file; unsigned int line; /* * While printk and pr_* have the level stored in the string at compile * time, some subsystems dynamically add it at runtime through the * format string. For these dynamic cases, we allow the subsystem to * tell us the level at compile time. * * NULL indicates that the level, if any, is stored in fmt. */ const char *level; /* * The format string used by various subsystem specific printk() * wrappers to prefix the message. * * Note that the static prefix defined by the pr_fmt() macro is stored * directly in the message format (@fmt), not here. */ const char *subsys_fmt_prefix; } __packed; #define __printk_index_emit(_fmt, _level, _subsys_fmt_prefix) \ do { \ if (__builtin_constant_p(_fmt) && __builtin_constant_p(_level)) { \ /* * We check __builtin_constant_p multiple times here * for the same input because GCC will produce an error * if we try to assign a static variable to fmt if it * is not a constant, even with the outer if statement. */ \ static const struct pi_entry _entry \ __used = { \ .fmt = __builtin_constant_p(_fmt) ? (_fmt) : NULL, \ .func = __func__, \ .file = __FILE__, \ .line = __LINE__, \ .level = __builtin_constant_p(_level) ? (_level) : NULL, \ .subsys_fmt_prefix = _subsys_fmt_prefix,\ }; \ static const struct pi_entry *_entry_ptr \ __used __section(".printk_index") = &_entry; \ } \ } while (0) #else /* !CONFIG_PRINTK_INDEX */ #define __printk_index_emit(...) do {} while (0) #endif /* CONFIG_PRINTK_INDEX */ /* * Some subsystems have their own custom printk that applies a va_format to a * generic format, for example, to include a device number or other metadata * alongside the format supplied by the caller. * * In order to store these in the way they would be emitted by the printk * infrastructure, the subsystem provides us with the start, fixed string, and * any subsequent text in the format string. * * We take a variable argument list as pr_fmt/dev_fmt/etc are sometimes passed * as multiple arguments (eg: `"%s: ", "blah"`), and we must only take the * first one. * * subsys_fmt_prefix must be known at compile time, or compilation will fail * (since this is a mistake). If fmt or level is not known at compile time, no * index entry will be made (since this can legitimately happen). */ #define printk_index_subsys_emit(subsys_fmt_prefix, level, fmt, ...) \ __printk_index_emit(fmt, level, subsys_fmt_prefix) #define printk_index_wrap(_p_func, _fmt, ...) \ ({ \ __printk_index_emit(_fmt, NULL, NULL); \ _p_func(_fmt, ##__VA_ARGS__); \ }) /** * printk - print a kernel message * @fmt: format string * * This is printk(). It can be called from any context. We want it to work. * * If printk indexing is enabled, _printk() is called from printk_index_wrap. * Otherwise, printk is simply #defined to _printk. * * We try to grab the console_lock. If we succeed, it's easy - we log the * output and call the console drivers. If we fail to get the semaphore, we * place the output into the log buffer and return. The current holder of * the console_sem will notice the new output in console_unlock(); and will * send it to the consoles before releasing the lock. * * One effect of this deferred printing is that code which calls printk() and * then changes console_loglevel may break. This is because console_loglevel * is inspected when the actual printing occurs. * * See also: * printf(3) * * See the vsnprintf() documentation for format string extensions over C99. */ #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) #define printk_deferred(fmt, ...) \ printk_index_wrap(_printk_deferred, fmt, ##__VA_ARGS__) /** * pr_emerg - Print an emergency-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_emerg(fmt, ...) \ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) /** * pr_alert - Print an alert-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_alert(fmt, ...) \ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_crit - Print a critical-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_crit(fmt, ...) \ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_err - Print an error-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_err(fmt, ...) \ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) /** * pr_warn - Print a warning-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt() * to generate the format string. */ #define pr_warn(fmt, ...) \ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) /** * pr_notice - Print a notice-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_notice(fmt, ...) \ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) /** * pr_info - Print an info-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /** * pr_cont - Continues a previous log message in the same line. * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CONT loglevel. It should only be * used when continuing a log message with no newline ('\n') enclosed. Otherwise * it defaults back to KERN_DEFAULT loglevel. */ #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) /** * pr_devel - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is * defined. Otherwise it does nothing. * * It uses pr_fmt() to generate the format string. */ #ifdef DEBUG #define pr_devel(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #include <linux/dynamic_debug.h> /** * pr_debug - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing. * * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses * pr_fmt() internally). */ #define pr_debug(fmt, ...) \ dynamic_pr_debug(fmt, ##__VA_ARGS__) #elif defined(DEBUG) #define pr_debug(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #ifdef CONFIG_PRINTK #define printk_once(fmt, ...) \ DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__) #define printk_deferred_once(fmt, ...) \ DO_ONCE_LITE(printk_deferred, fmt, ##__VA_ARGS__) #else #define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #define printk_deferred_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_once(fmt, ...) \ printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_once(fmt, ...) \ printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_once(fmt, ...) \ printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_once(fmt, ...) \ printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_once(fmt, ...) \ printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_once(fmt, ...) \ printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_once, don't do that... */ #if defined(DEBUG) #define pr_devel_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) #define pr_debug_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * ratelimited messages with local ratelimit_state, * no local ratelimit_state used in the !PRINTK case */ #ifdef CONFIG_PRINTK #define printk_ratelimited(fmt, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ \ if (__ratelimit(&_rs)) \ printk(fmt, ##__VA_ARGS__); \ }) #else #define printk_ratelimited(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_ratelimited(fmt, ...) \ printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_ratelimited(fmt, ...) \ printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_ratelimited(fmt, ...) \ printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_ratelimited(fmt, ...) \ printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_ratelimited(fmt, ...) \ printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_ratelimited, don't do that... */ #if defined(DEBUG) #define pr_devel_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) /* descriptor check is first to prevent flooding with "callbacks suppressed" */ #define pr_debug_ratelimited(fmt, ...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ __ratelimit(&_rs)) \ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define pr_debug_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif extern const struct file_operations kmsg_fops; enum { DUMP_PREFIX_NONE, DUMP_PREFIX_ADDRESS, DUMP_PREFIX_OFFSET }; extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, char *linebuf, size_t linebuflen, bool ascii); #ifdef CONFIG_PRINTK extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); #else static inline void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type, const void *buf, size_t len) { } #endif #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #elif defined(DEBUG) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #else static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } #endif /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; * caller supplies trailing spaces for alignment if desired * @prefix_type: controls whether prefix of an offset, address, or none * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) * @buf: data blob to dump * @len: number of bytes in the @buf * * Calls print_hex_dump(), with log level of KERN_DEBUG, * rowsize of 16, groupsize of 1, and ASCII output included. */ #define print_hex_dump_bytes(prefix_str, prefix_type, buf, len) \ print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true) #endif |
2 2 2 25 27 7 8 1 5 1 1 1 1 1 1 23 18 20 19 27 27 85 7 7 6 6 5 5 1 1 21 21 37 19 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Software async crypto daemon. * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> * * Added AEAD support to cryptd. * Authors: Tadeusz Struk (tadeusz.struk@intel.com) * Adrian Hoban <adrian.hoban@intel.com> * Gabriele Paoloni <gabriele.paoloni@intel.com> * Aidan O'Mahony (aidan.o.mahony@intel.com) * Copyright (c) 2010, Intel Corporation. */ #include <crypto/internal/hash.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/cryptd.h> #include <linux/refcount.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/workqueue.h> static unsigned int cryptd_max_cpu_qlen = 1000; module_param(cryptd_max_cpu_qlen, uint, 0); MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth"); static struct workqueue_struct *cryptd_wq; struct cryptd_cpu_queue { struct crypto_queue queue; struct work_struct work; }; struct cryptd_queue { /* * Protected by disabling BH to allow enqueueing from softinterrupt and * dequeuing from kworker (cryptd_queue_worker()). */ struct cryptd_cpu_queue __percpu *cpu_queue; }; struct cryptd_instance_ctx { struct crypto_spawn spawn; struct cryptd_queue *queue; }; struct skcipherd_instance_ctx { struct crypto_skcipher_spawn spawn; struct cryptd_queue *queue; }; struct hashd_instance_ctx { struct crypto_shash_spawn spawn; struct cryptd_queue *queue; }; struct aead_instance_ctx { struct crypto_aead_spawn aead_spawn; struct cryptd_queue *queue; }; struct cryptd_skcipher_ctx { refcount_t refcnt; struct crypto_skcipher *child; }; struct cryptd_skcipher_request_ctx { struct skcipher_request req; }; struct cryptd_hash_ctx { refcount_t refcnt; struct crypto_shash *child; }; struct cryptd_hash_request_ctx { crypto_completion_t complete; void *data; struct shash_desc desc; }; struct cryptd_aead_ctx { refcount_t refcnt; struct crypto_aead *child; }; struct cryptd_aead_request_ctx { struct aead_request req; }; static void cryptd_queue_worker(struct work_struct *work); static int cryptd_init_queue(struct cryptd_queue *queue, unsigned int max_cpu_qlen) { int cpu; struct cryptd_cpu_queue *cpu_queue; queue->cpu_queue = alloc_percpu(struct cryptd_cpu_queue); if (!queue->cpu_queue) return -ENOMEM; for_each_possible_cpu(cpu) { cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, cryptd_queue_worker); } pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen); return 0; } static void cryptd_fini_queue(struct cryptd_queue *queue) { int cpu; struct cryptd_cpu_queue *cpu_queue; for_each_possible_cpu(cpu) { cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); BUG_ON(cpu_queue->queue.qlen); } free_percpu(queue->cpu_queue); } static int cryptd_enqueue_request(struct cryptd_queue *queue, struct crypto_async_request *request) { int err; struct cryptd_cpu_queue *cpu_queue; refcount_t *refcnt; local_bh_disable(); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); refcnt = crypto_tfm_ctx(request->tfm); if (err == -ENOSPC) goto out; queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work); if (!refcount_read(refcnt)) goto out; refcount_inc(refcnt); out: local_bh_enable(); return err; } /* Called in workqueue context, do one real cryption work (via * req->complete) and reschedule itself if there are more work to * do. */ static void cryptd_queue_worker(struct work_struct *work) { struct cryptd_cpu_queue *cpu_queue; struct crypto_async_request *req, *backlog; cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. */ local_bh_disable(); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); local_bh_enable(); if (!req) return; if (backlog) crypto_request_complete(backlog, -EINPROGRESS); crypto_request_complete(req, 0); if (cpu_queue->queue.qlen) queue_work(cryptd_wq, &cpu_queue->work); } static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm) { struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); struct cryptd_instance_ctx *ictx = crypto_instance_ctx(inst); return ictx->queue; } static void cryptd_type_and_mask(struct crypto_attr_type *algt, u32 *type, u32 *mask) { /* * cryptd is allowed to wrap internal algorithms, but in that case the * resulting cryptd instance will be marked as internal as well. */ *type = algt->type & CRYPTO_ALG_INTERNAL; *mask = algt->mask & CRYPTO_ALG_INTERNAL; /* No point in cryptd wrapping an algorithm that's already async. */ *mask |= CRYPTO_ALG_ASYNC; *mask |= crypto_algt_inherited_mask(algt); } static int cryptd_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); inst->alg.cra_priority = alg->cra_priority + 50; inst->alg.cra_blocksize = alg->cra_blocksize; inst->alg.cra_alignmask = alg->cra_alignmask; return 0; } static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child = ctx->child; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } static struct skcipher_request *cryptd_skcipher_prepare( struct skcipher_request *req, int err) { struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->req; struct cryptd_skcipher_ctx *ctx; struct crypto_skcipher *child; req->base.complete = subreq->base.complete; req->base.data = subreq->base.data; if (unlikely(err == -EINPROGRESS)) return NULL; ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); child = ctx->child; skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); return subreq; } static void cryptd_skcipher_complete(struct skcipher_request *req, int err, crypto_completion_t complete) { struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->req; int refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); skcipher_request_complete(req, err); local_bh_enable(); if (unlikely(err == -EINPROGRESS)) { subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = complete; req->base.data = req; } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_skcipher(tfm); } static void cryptd_skcipher_encrypt(void *data, int err) { struct skcipher_request *req = data; struct skcipher_request *subreq; subreq = cryptd_skcipher_prepare(req, err); if (likely(subreq)) err = crypto_skcipher_encrypt(subreq); cryptd_skcipher_complete(req, err, cryptd_skcipher_encrypt); } static void cryptd_skcipher_decrypt(void *data, int err) { struct skcipher_request *req = data; struct skcipher_request *subreq; subreq = cryptd_skcipher_prepare(req, err); if (likely(subreq)) err = crypto_skcipher_decrypt(subreq); cryptd_skcipher_complete(req, err, cryptd_skcipher_decrypt); } static int cryptd_skcipher_enqueue(struct skcipher_request *req, crypto_completion_t compl) { struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->req; struct cryptd_queue *queue; queue = cryptd_get_queue(crypto_skcipher_tfm(tfm)); subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = compl; req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } static int cryptd_skcipher_encrypt_enqueue(struct skcipher_request *req) { return cryptd_skcipher_enqueue(req, cryptd_skcipher_encrypt); } static int cryptd_skcipher_decrypt_enqueue(struct skcipher_request *req) { return cryptd_skcipher_enqueue(req, cryptd_skcipher_decrypt); } static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct skcipherd_instance_ctx *ictx = skcipher_instance_ctx(inst); struct crypto_skcipher_spawn *spawn = &ictx->spawn; struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *cipher; cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; crypto_skcipher_set_reqsize( tfm, sizeof(struct cryptd_skcipher_request_ctx) + crypto_skcipher_reqsize(cipher)); return 0; } static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); } static void cryptd_skcipher_free(struct skcipher_instance *inst) { struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ctx->spawn); kfree(inst); } static int cryptd_create_skcipher(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt, struct cryptd_queue *queue) { struct skcipherd_instance_ctx *ctx; struct skcipher_instance *inst; struct skcipher_alg_common *alg; u32 type; u32 mask; int err; cryptd_type_and_mask(algt, &type, &mask); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = skcipher_instance_ctx(inst); ctx->queue = queue; err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(&ctx->spawn); err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base); if (err) goto err_free_inst; inst->alg.base.cra_flags |= CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); inst->alg.ivsize = alg->ivsize; inst->alg.chunksize = alg->chunksize; inst->alg.min_keysize = alg->min_keysize; inst->alg.max_keysize = alg->max_keysize; inst->alg.base.cra_ctxsize = sizeof(struct cryptd_skcipher_ctx); inst->alg.init = cryptd_skcipher_init_tfm; inst->alg.exit = cryptd_skcipher_exit_tfm; inst->alg.setkey = cryptd_skcipher_setkey; inst->alg.encrypt = cryptd_skcipher_encrypt_enqueue; inst->alg.decrypt = cryptd_skcipher_decrypt_enqueue; inst->free = cryptd_skcipher_free; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: cryptd_skcipher_free(inst); } return err; } static int cryptd_hash_init_tfm(struct crypto_ahash *tfm) { struct ahash_instance *inst = ahash_alg_instance(tfm); struct hashd_instance_ctx *ictx = ahash_instance_ctx(inst); struct crypto_shash_spawn *spawn = &ictx->spawn; struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *hash; hash = crypto_spawn_shash(spawn); if (IS_ERR(hash)) return PTR_ERR(hash); ctx->child = hash; crypto_ahash_set_reqsize(tfm, sizeof(struct cryptd_hash_request_ctx) + crypto_shash_descsize(hash)); return 0; } static int cryptd_hash_clone_tfm(struct crypto_ahash *ntfm, struct crypto_ahash *tfm) { struct cryptd_hash_ctx *nctx = crypto_ahash_ctx(ntfm); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *hash; hash = crypto_clone_shash(ctx->child); if (IS_ERR(hash)) return PTR_ERR(hash); nctx->child = hash; return 0; } static void cryptd_hash_exit_tfm(struct crypto_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); crypto_free_shash(ctx->child); } static int cryptd_hash_setkey(struct crypto_ahash *parent, const u8 *key, unsigned int keylen) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); struct crypto_shash *child = ctx->child; crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_shash_setkey(child, key, keylen); } static int cryptd_hash_enqueue(struct ahash_request *req, crypto_completion_t compl) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_queue *queue = cryptd_get_queue(crypto_ahash_tfm(tfm)); rctx->complete = req->base.complete; rctx->data = req->base.data; req->base.complete = compl; req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } static struct shash_desc *cryptd_hash_prepare(struct ahash_request *req, int err) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); req->base.complete = rctx->complete; req->base.data = rctx->data; if (unlikely(err == -EINPROGRESS)) return NULL; return &rctx->desc; } static void cryptd_hash_complete(struct ahash_request *req, int err, crypto_completion_t complete) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); int refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); ahash_request_complete(req, err); local_bh_enable(); if (err == -EINPROGRESS) { req->base.complete = complete; req->base.data = req; } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_ahash(tfm); } static void cryptd_hash_init(void *data, int err) { struct ahash_request *req = data; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *child = ctx->child; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (unlikely(!desc)) goto out; desc->tfm = child; err = crypto_shash_init(desc); out: cryptd_hash_complete(req, err, cryptd_hash_init); } static int cryptd_hash_init_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_init); } static void cryptd_hash_update(void *data, int err) { struct ahash_request *req = data; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (likely(desc)) err = shash_ahash_update(req, desc); cryptd_hash_complete(req, err, cryptd_hash_update); } static int cryptd_hash_update_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_update); } static void cryptd_hash_final(void *data, int err) { struct ahash_request *req = data; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (likely(desc)) err = crypto_shash_final(desc, req->result); cryptd_hash_complete(req, err, cryptd_hash_final); } static int cryptd_hash_final_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_final); } static void cryptd_hash_finup(void *data, int err) { struct ahash_request *req = data; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (likely(desc)) err = shash_ahash_finup(req, desc); cryptd_hash_complete(req, err, cryptd_hash_finup); } static int cryptd_hash_finup_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_finup); } static void cryptd_hash_digest(void *data, int err) { struct ahash_request *req = data; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *child = ctx->child; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (unlikely(!desc)) goto out; desc->tfm = child; err = shash_ahash_digest(req, desc); out: cryptd_hash_complete(req, err, cryptd_hash_digest); } static int cryptd_hash_digest_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_digest); } static int cryptd_hash_export(struct ahash_request *req, void *out) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); return crypto_shash_export(&rctx->desc, out); } static int cryptd_hash_import(struct ahash_request *req, const void *in) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct shash_desc *desc = cryptd_shash_desc(req); desc->tfm = ctx->child; return crypto_shash_import(desc, in); } static void cryptd_hash_free(struct ahash_instance *inst) { struct hashd_instance_ctx *ctx = ahash_instance_ctx(inst); crypto_drop_shash(&ctx->spawn); kfree(inst); } static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt, struct cryptd_queue *queue) { struct hashd_instance_ctx *ctx; struct ahash_instance *inst; struct shash_alg *alg; u32 type; u32 mask; int err; cryptd_type_and_mask(algt, &type, &mask); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = ahash_instance_ctx(inst); ctx->queue = queue; err = crypto_grab_shash(&ctx->spawn, ahash_crypto_instance(inst), crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; alg = crypto_spawn_shash_alg(&ctx->spawn); err = cryptd_init_instance(ahash_crypto_instance(inst), &alg->base); if (err) goto err_free_inst; inst->alg.halg.base.cra_flags |= CRYPTO_ALG_ASYNC | (alg->base.cra_flags & (CRYPTO_ALG_INTERNAL| CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = alg->digestsize; inst->alg.halg.statesize = alg->statesize; inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx); inst->alg.init_tfm = cryptd_hash_init_tfm; inst->alg.clone_tfm = cryptd_hash_clone_tfm; inst->alg.exit_tfm = cryptd_hash_exit_tfm; inst->alg.init = cryptd_hash_init_enqueue; inst->alg.update = cryptd_hash_update_enqueue; inst->alg.final = cryptd_hash_final_enqueue; inst->alg.finup = cryptd_hash_finup_enqueue; inst->alg.export = cryptd_hash_export; inst->alg.import = cryptd_hash_import; if (crypto_shash_alg_has_setkey(alg)) inst->alg.setkey = cryptd_hash_setkey; inst->alg.digest = cryptd_hash_digest_enqueue; inst->free = cryptd_hash_free; err = ahash_register_instance(tmpl, inst); if (err) { err_free_inst: cryptd_hash_free(inst); } return err; } static int cryptd_aead_setkey(struct crypto_aead *parent, const u8 *key, unsigned int keylen) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(parent); struct crypto_aead *child = ctx->child; return crypto_aead_setkey(child, key, keylen); } static int cryptd_aead_setauthsize(struct crypto_aead *parent, unsigned int authsize) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(parent); struct crypto_aead *child = ctx->child; return crypto_aead_setauthsize(child, authsize); } static void cryptd_aead_crypt(struct aead_request *req, struct crypto_aead *child, int err, int (*crypt)(struct aead_request *req), crypto_completion_t compl) { struct cryptd_aead_request_ctx *rctx; struct aead_request *subreq; struct cryptd_aead_ctx *ctx; struct crypto_aead *tfm; int refcnt; rctx = aead_request_ctx(req); subreq = &rctx->req; req->base.complete = subreq->base.complete; req->base.data = subreq->base.data; tfm = crypto_aead_reqtfm(req); if (unlikely(err == -EINPROGRESS)) goto out; aead_request_set_tfm(subreq, child); aead_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); aead_request_set_ad(subreq, req->assoclen); err = crypt(subreq); out: ctx = crypto_aead_ctx(tfm); refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); aead_request_complete(req, err); local_bh_enable(); if (err == -EINPROGRESS) { subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = compl; req->base.data = req; } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_aead(tfm); } static void cryptd_aead_encrypt(void *data, int err) { struct aead_request *req = data; struct cryptd_aead_ctx *ctx; struct crypto_aead *child; ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); child = ctx->child; cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt, cryptd_aead_encrypt); } static void cryptd_aead_decrypt(void *data, int err) { struct aead_request *req = data; struct cryptd_aead_ctx *ctx; struct crypto_aead *child; ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); child = ctx->child; cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt, cryptd_aead_decrypt); } static int cryptd_aead_enqueue(struct aead_request *req, crypto_completion_t compl) { struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm)); struct aead_request *subreq = &rctx->req; subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = compl; req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } static int cryptd_aead_encrypt_enqueue(struct aead_request *req) { return cryptd_aead_enqueue(req, cryptd_aead_encrypt ); } static int cryptd_aead_decrypt_enqueue(struct aead_request *req) { return cryptd_aead_enqueue(req, cryptd_aead_decrypt ); } static int cryptd_aead_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct aead_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_aead_spawn *spawn = &ictx->aead_spawn; struct cryptd_aead_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_aead *cipher; cipher = crypto_spawn_aead(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; crypto_aead_set_reqsize( tfm, sizeof(struct cryptd_aead_request_ctx) + crypto_aead_reqsize(cipher)); return 0; } static void cryptd_aead_exit_tfm(struct crypto_aead *tfm) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); } static void cryptd_aead_free(struct aead_instance *inst) { struct aead_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_aead(&ctx->aead_spawn); kfree(inst); } static int cryptd_create_aead(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt, struct cryptd_queue *queue) { struct aead_instance_ctx *ctx; struct aead_instance *inst; struct aead_alg *alg; u32 type; u32 mask; int err; cryptd_type_and_mask(algt, &type, &mask); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = aead_instance_ctx(inst); ctx->queue = queue; err = crypto_grab_aead(&ctx->aead_spawn, aead_crypto_instance(inst), crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; alg = crypto_spawn_aead_alg(&ctx->aead_spawn); err = cryptd_init_instance(aead_crypto_instance(inst), &alg->base); if (err) goto err_free_inst; inst->alg.base.cra_flags |= CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); inst->alg.base.cra_ctxsize = sizeof(struct cryptd_aead_ctx); inst->alg.ivsize = crypto_aead_alg_ivsize(alg); inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg); inst->alg.init = cryptd_aead_init_tfm; inst->alg.exit = cryptd_aead_exit_tfm; inst->alg.setkey = cryptd_aead_setkey; inst->alg.setauthsize = cryptd_aead_setauthsize; inst->alg.encrypt = cryptd_aead_encrypt_enqueue; inst->alg.decrypt = cryptd_aead_decrypt_enqueue; inst->free = cryptd_aead_free; err = aead_register_instance(tmpl, inst); if (err) { err_free_inst: cryptd_aead_free(inst); } return err; } static struct cryptd_queue queue; static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_attr_type *algt; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_LSKCIPHER: return cryptd_create_skcipher(tmpl, tb, algt, &queue); case CRYPTO_ALG_TYPE_HASH: return cryptd_create_hash(tmpl, tb, algt, &queue); case CRYPTO_ALG_TYPE_AEAD: return cryptd_create_aead(tmpl, tb, algt, &queue); } return -EINVAL; } static struct crypto_template cryptd_tmpl = { .name = "cryptd", .create = cryptd_create, .module = THIS_MODULE, }; struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; struct cryptd_skcipher_ctx *ctx; struct crypto_skcipher *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) return ERR_PTR(-EINVAL); tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { crypto_free_skcipher(tfm); return ERR_PTR(-EINVAL); } ctx = crypto_skcipher_ctx(tfm); refcount_set(&ctx->refcnt, 1); return container_of(tfm, struct cryptd_skcipher, base); } EXPORT_SYMBOL_GPL(cryptd_alloc_skcipher); struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_skcipher_child); bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); return refcount_read(&ctx->refcnt) - 1; } EXPORT_SYMBOL_GPL(cryptd_skcipher_queued); void cryptd_free_skcipher(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); if (refcount_dec_and_test(&ctx->refcnt)) crypto_free_skcipher(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_skcipher); struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; struct cryptd_hash_ctx *ctx; struct crypto_ahash *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) return ERR_PTR(-EINVAL); tfm = crypto_alloc_ahash(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { crypto_free_ahash(tfm); return ERR_PTR(-EINVAL); } ctx = crypto_ahash_ctx(tfm); refcount_set(&ctx->refcnt, 1); return __cryptd_ahash_cast(tfm); } EXPORT_SYMBOL_GPL(cryptd_alloc_ahash); struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_ahash_child); struct shash_desc *cryptd_shash_desc(struct ahash_request *req) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); return &rctx->desc; } EXPORT_SYMBOL_GPL(cryptd_shash_desc); bool cryptd_ahash_queued(struct cryptd_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); return refcount_read(&ctx->refcnt) - 1; } EXPORT_SYMBOL_GPL(cryptd_ahash_queued); void cryptd_free_ahash(struct cryptd_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); if (refcount_dec_and_test(&ctx->refcnt)) crypto_free_ahash(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_ahash); struct cryptd_aead *cryptd_alloc_aead(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; struct cryptd_aead_ctx *ctx; struct crypto_aead *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) return ERR_PTR(-EINVAL); tfm = crypto_alloc_aead(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { crypto_free_aead(tfm); return ERR_PTR(-EINVAL); } ctx = crypto_aead_ctx(tfm); refcount_set(&ctx->refcnt, 1); return __cryptd_aead_cast(tfm); } EXPORT_SYMBOL_GPL(cryptd_alloc_aead); struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm) { struct cryptd_aead_ctx *ctx; ctx = crypto_aead_ctx(&tfm->base); return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_aead_child); bool cryptd_aead_queued(struct cryptd_aead *tfm) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base); return refcount_read(&ctx->refcnt) - 1; } EXPORT_SYMBOL_GPL(cryptd_aead_queued); void cryptd_free_aead(struct cryptd_aead *tfm) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base); if (refcount_dec_and_test(&ctx->refcnt)) crypto_free_aead(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_aead); static int __init cryptd_init(void) { int err; cryptd_wq = alloc_workqueue("cryptd", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); if (!cryptd_wq) return -ENOMEM; err = cryptd_init_queue(&queue, cryptd_max_cpu_qlen); if (err) goto err_destroy_wq; err = crypto_register_template(&cryptd_tmpl); if (err) goto err_fini_queue; return 0; err_fini_queue: cryptd_fini_queue(&queue); err_destroy_wq: destroy_workqueue(cryptd_wq); return err; } static void __exit cryptd_exit(void) { destroy_workqueue(cryptd_wq); cryptd_fini_queue(&queue); crypto_unregister_template(&cryptd_tmpl); } subsys_initcall(cryptd_init); module_exit(cryptd_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Software async crypto daemon"); MODULE_ALIAS_CRYPTO("cryptd"); |
31 2045 2067 2067 2070 215 1991 971 979 2082 1947 1960 31 31 31 31 31 31 11 31 31 30 1 67 31 31 154 152 151 53 73 30 3 66 1 2059 2082 2060 2072 1951 1942 10 1941 30 2063 1950 2064 2064 1470 1558 2082 1 2069 2062 2082 154 2209 2222 1949 345 397 1013 1018 1013 1011 968 1442 2209 1947 1765 1 4 4 4 4 3 1 1 2 4 2 2 5 1 2 3 81 36 2 50 49 74 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * * High-resolution kernel timers * * In contrast to the low-resolution timeout API, aka timer wheel, * hrtimers provide finer resolution and accuracy depending on system * configuration and capabilities. * * Started by: Thomas Gleixner and Ingo Molnar * * Credits: * Based on the original timer wheel code * * Help, testing, suggestions, bugfixes, improvements were * provided by: * * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel * et. al. */ #include <linux/cpu.h> #include <linux/export.h> #include <linux/percpu.h> #include <linux/hrtimer.h> #include <linux/notifier.h> #include <linux/syscalls.h> #include <linux/interrupt.h> #include <linux/tick.h> #include <linux/err.h> #include <linux/debugobjects.h> #include <linux/sched/signal.h> #include <linux/sched/sysctl.h> #include <linux/sched/rt.h> #include <linux/sched/deadline.h> #include <linux/sched/nohz.h> #include <linux/sched/debug.h> #include <linux/sched/isolation.h> #include <linux/timer.h> #include <linux/freezer.h> #include <linux/compat.h> #include <linux/uaccess.h> #include <trace/events/timer.h> #include "tick-internal.h" /* * Masks for selecting the soft and hard context timers from * cpu_base->active */ #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) /* * The timer bases: * * There are more clockids than hrtimer bases. Thus, we index * into the timer bases by the hrtimer_base_type enum. When trying * to reach a base using a clockid, hrtimer_clockid_to_base() * is used to convert from clockid to the proper hrtimer_base_type. */ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), .clock_base = { { .index = HRTIMER_BASE_MONOTONIC, .clockid = CLOCK_MONOTONIC, .get_time = &ktime_get, }, { .index = HRTIMER_BASE_REALTIME, .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, }, { .index = HRTIMER_BASE_BOOTTIME, .clockid = CLOCK_BOOTTIME, .get_time = &ktime_get_boottime, }, { .index = HRTIMER_BASE_TAI, .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, { .index = HRTIMER_BASE_MONOTONIC_SOFT, .clockid = CLOCK_MONOTONIC, .get_time = &ktime_get, }, { .index = HRTIMER_BASE_REALTIME_SOFT, .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, }, { .index = HRTIMER_BASE_BOOTTIME_SOFT, .clockid = CLOCK_BOOTTIME, .get_time = &ktime_get_boottime, }, { .index = HRTIMER_BASE_TAI_SOFT, .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, } }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { /* Make sure we catch unsupported clockids */ [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, [CLOCK_TAI] = HRTIMER_BASE_TAI, }; /* * Functions and macros which are different for UP/SMP systems are kept in a * single place */ #ifdef CONFIG_SMP /* * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() * such that hrtimer_callback_running() can unconditionally dereference * timer->base->cpu_base */ static struct hrtimer_cpu_base migration_cpu_base = { .clock_base = { { .cpu_base = &migration_cpu_base, .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, &migration_cpu_base.lock), }, }, }; #define migration_base migration_cpu_base.clock_base[0] static inline bool is_migration_base(struct hrtimer_clock_base *base) { return base == &migration_base; } /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are * locked, and the base itself is locked too. * * So __run_timers/migrate_timers can safely modify all timers which could * be found on the lists/queues. * * When the timer's base is locked, and the timer removed from list, it is * possible to set timer->base = &migration_base and drop the lock: the timer * remains locked. */ static struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __acquires(&timer->base->lock) { struct hrtimer_clock_base *base; for (;;) { base = READ_ONCE(timer->base); if (likely(base != &migration_base)) { raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); if (likely(base == timer->base)) return base; /* The timer has migrated to another CPU: */ raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); } cpu_relax(); } } /* * We do not migrate the timer when it is expiring before the next * event on the target cpu. When high resolution is enabled, we cannot * reprogram the target cpu hardware and we would cause it to fire * late. To keep it simple, we handle the high resolution enabled and * disabled case similar. * * Called with cpu_base->lock of target cpu held. */ static int hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) { ktime_t expires; expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); return expires < new_base->cpu_base->expires_next; } static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); #endif return base; } /* * We switch the timer base to a power-optimized selected CPU target, * if: * - NO_HZ_COMMON is enabled * - timer migration is enabled * - the timer callback is not running * - the timer is not the first expiring timer on the new target * * If one of the above requirements is not fulfilled we move the timer * to the current CPU or leave it on the previously assigned CPU if * the timer callback is currently running. */ static inline struct hrtimer_clock_base * switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, int pinned) { struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base; struct hrtimer_clock_base *new_base; int basenum = base->index; this_cpu_base = this_cpu_ptr(&hrtimer_bases); new_cpu_base = get_target_base(this_cpu_base, pinned); again: new_base = &new_cpu_base->clock_base[basenum]; if (base != new_base) { /* * We are trying to move timer to new_base. * However we can't change timer's base while it is running, * so we keep it on the same CPU. No hassle vs. reprogramming * the event source in the high resolution case. The softirq * code will take care of this when the timer function has * completed. There is no conflict as we hold the lock until * the timer is enqueued. */ if (unlikely(hrtimer_callback_running(timer))) return base; /* See the comment in lock_hrtimer_base() */ WRITE_ONCE(timer->base, &migration_base); raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); if (new_cpu_base != this_cpu_base && hrtimer_check_target(timer, new_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; WRITE_ONCE(timer->base, base); goto again; } WRITE_ONCE(timer->base, new_base); } else { if (new_cpu_base != this_cpu_base && hrtimer_check_target(timer, new_base)) { new_cpu_base = this_cpu_base; goto again; } } return new_base; } #else /* CONFIG_SMP */ static inline bool is_migration_base(struct hrtimer_clock_base *base) { return false; } static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __acquires(&timer->base->cpu_base->lock) { struct hrtimer_clock_base *base = timer->base; raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); return base; } # define switch_hrtimer_base(t, b, p) (b) #endif /* !CONFIG_SMP */ /* * Functions for the union type storage format of ktime_t which are * too large for inlining: */ #if BITS_PER_LONG < 64 /* * Divide a ktime value by a nanosecond value */ s64 __ktime_divns(const ktime_t kt, s64 div) { int sft = 0; s64 dclc; u64 tmp; dclc = ktime_to_ns(kt); tmp = dclc < 0 ? -dclc : dclc; /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; div >>= 1; } tmp >>= sft; do_div(tmp, (u32) div); return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); #endif /* BITS_PER_LONG >= 64 */ /* * Add two ktime values and do a safety check for overflow: */ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) { ktime_t res = ktime_add_unsafe(lhs, rhs); /* * We use KTIME_SEC_MAX here, the maximum timeout which we can * return to user space in a timespec: */ if (res < 0 || res < lhs || res < rhs) res = ktime_set(KTIME_SEC_MAX, 0); return res; } EXPORT_SYMBOL_GPL(ktime_add_safe); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static const struct debug_obj_descr hrtimer_debug_descr; static void *hrtimer_debug_hint(void *addr) { return ((struct hrtimer *) addr)->function; } /* * fixup_init is called when: * - an active object is initialized */ static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state) { struct hrtimer *timer = addr; switch (state) { case ODEBUG_STATE_ACTIVE: hrtimer_cancel(timer); debug_object_init(timer, &hrtimer_debug_descr); return true; default: return false; } } /* * fixup_activate is called when: * - an active object is activated * - an unknown non-static object is activated */ static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state) { switch (state) { case ODEBUG_STATE_ACTIVE: WARN_ON(1); fallthrough; default: return false; } } /* * fixup_free is called when: * - an active object is freed */ static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) { struct hrtimer *timer = addr; switch (state) { case ODEBUG_STATE_ACTIVE: hrtimer_cancel(timer); debug_object_free(timer, &hrtimer_debug_descr); return true; default: return false; } } static const struct debug_obj_descr hrtimer_debug_descr = { .name = "hrtimer", .debug_hint = hrtimer_debug_hint, .fixup_init = hrtimer_fixup_init, .fixup_activate = hrtimer_fixup_activate, .fixup_free = hrtimer_fixup_free, }; static inline void debug_hrtimer_init(struct hrtimer *timer) { debug_object_init(timer, &hrtimer_debug_descr); } static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode) { debug_object_activate(timer, &hrtimer_debug_descr); } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { debug_object_deactivate(timer, &hrtimer_debug_descr); } static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode); void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { debug_object_init_on_stack(timer, &hrtimer_debug_descr); __hrtimer_init(timer, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode); void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); __hrtimer_init_sleeper(sl, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); void destroy_hrtimer_on_stack(struct hrtimer *timer) { debug_object_free(timer, &hrtimer_debug_descr); } EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else static inline void debug_hrtimer_init(struct hrtimer *timer) { } static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif static inline void debug_init(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) { debug_hrtimer_init(timer); trace_hrtimer_init(timer, clockid, mode); } static inline void debug_activate(struct hrtimer *timer, enum hrtimer_mode mode) { debug_hrtimer_activate(timer, mode); trace_hrtimer_start(timer, mode); } static inline void debug_deactivate(struct hrtimer *timer) { debug_hrtimer_deactivate(timer); trace_hrtimer_cancel(timer); } static struct hrtimer_clock_base * __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) { unsigned int idx; if (!*active) return NULL; idx = __ffs(*active); *active &= ~(1U << idx); return &cpu_base->clock_base[idx]; } #define for_each_active_base(base, cpu_base, active) \ while ((base = __next_base((cpu_base), &(active)))) static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, const struct hrtimer *exclude, unsigned int active, ktime_t expires_next) { struct hrtimer_clock_base *base; ktime_t expires; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; next = timerqueue_getnext(&base->active); timer = container_of(next, struct hrtimer, node); if (timer == exclude) { /* Get to the next timer in the queue. */ next = timerqueue_iterate_next(next); if (!next) continue; timer = container_of(next, struct hrtimer, node); } expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; /* Skip cpu_base update if a timer is being excluded. */ if (exclude) continue; if (timer->is_soft) cpu_base->softirq_next_timer = timer; else cpu_base->next_timer = timer; } } /* * clock_was_set() might have changed base->offset of any of * the clock bases so the result might be negative. Fix it up * to prevent a false positive in clockevents_program_event(). */ if (expires_next < 0) expires_next = 0; return expires_next; } /* * Recomputes cpu_base::*next_timer and returns the earliest expires_next * but does not set cpu_base::*expires_next, that is done by * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating * cpu_base::*expires_next right away, reprogramming logic would no longer * work. * * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, * those timers will get run whenever the softirq gets handled, at the end of * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. * * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. * * @active_mask must be one of: * - HRTIMER_ACTIVE_ALL, * - HRTIMER_ACTIVE_SOFT, or * - HRTIMER_ACTIVE_HARD. */ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) { unsigned int active; struct hrtimer *next_timer = NULL; ktime_t expires_next = KTIME_MAX; if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; cpu_base->softirq_next_timer = NULL; expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, KTIME_MAX); next_timer = cpu_base->softirq_next_timer; } if (active_mask & HRTIMER_ACTIVE_HARD) { active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; cpu_base->next_timer = next_timer; expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, expires_next); } return expires_next; } static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) { ktime_t expires_next, soft = KTIME_MAX; /* * If the soft interrupt has already been activated, ignore the * soft bases. They will be handled in the already raised soft * interrupt. */ if (!cpu_base->softirq_activated) { soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); /* * Update the soft expiry time. clock_settime() might have * affected it. */ cpu_base->softirq_expires_next = soft; } expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); /* * If a softirq timer is expiring first, update cpu_base->next_timer * and program the hardware with the soft expiry time. */ if (expires_next > soft) { cpu_base->next_timer = cpu_base->softirq_next_timer; expires_next = soft; } return expires_next; } static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real, offs_boot, offs_tai); base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; return now; } /* * Is the high resolution mode active ? */ static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) { return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? cpu_base->hres_active : 0; } static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtimer *next_timer, ktime_t expires_next) { cpu_base->expires_next = expires_next; /* * If hres is not active, hardware does not have to be * reprogrammed yet. * * If a hang was detected in the last timer interrupt then we * leave the hang delay active in the hardware. We want the * system to make progress. That also prevents the following * scenario: * T1 expires 50ms from now * T2 expires 5s from now * * T1 is removed, so this code is called and would reprogram * the hardware to 5s from now. Any hrtimer_start after that * will not reprogram the hardware due to hang_detected being * set. So we'd effectively block all timers until the T2 event * fires. */ if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; tick_program_event(expires_next, 1); } /* * Reprogram the event source with checking both queues for the * next event * Called with interrupts disabled and base->lock held */ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; expires_next = hrtimer_update_next_event(cpu_base); if (skip_equal && expires_next == cpu_base->expires_next) return; __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next); } /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS /* * High resolution timer enabled ? */ static bool hrtimer_hres_enabled __read_mostly = true; unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; EXPORT_SYMBOL_GPL(hrtimer_resolution); /* * Enable / Disable high resolution mode */ static int __init setup_hrtimer_hres(char *str) { return (kstrtobool(str, &hrtimer_hres_enabled) == 0); } __setup("highres=", setup_hrtimer_hres); /* * hrtimer_high_res_enabled - query, if the highres mode is enabled */ static inline int hrtimer_is_hres_enabled(void) { return hrtimer_hres_enabled; } static void retrigger_next_event(void *arg); /* * Switch to high resolution mode */ static void hrtimer_switch_to_hres(void) { struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); if (tick_init_highres()) { pr_warn("Could not switch to high resolution mode on CPU %u\n", base->cpu); return; } base->hres_active = 1; hrtimer_resolution = HIGH_RES_NSEC; tick_setup_sched_timer(true); /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); } #else static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ /* * Retrigger next event is called after clock was set with interrupts * disabled through an SMP function call or directly from low level * resume code. * * This is only invoked when: * - CONFIG_HIGH_RES_TIMERS is enabled. * - CONFIG_NOHZ_COMMON is enabled * * For the other cases this function is empty and because the call sites * are optimized out it vanishes as well, i.e. no need for lots of * #ifdeffery. */ static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); /* * When high resolution mode or nohz is active, then the offsets of * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the * next tick will take care of that. * * If high resolution mode is active then the next expiring timer * must be reevaluated and the clock event device reprogrammed if * necessary. * * In the NOHZ case the update of the offset and the reevaluation * of the next expiring timer is enough. The return from the SMP * function call will take care of the reprogramming in case the * CPU was in a NOHZ idle sleep. */ if (!hrtimer_hres_active(base) && !tick_nohz_active) return; raw_spin_lock(&base->lock); hrtimer_update_base(base); if (hrtimer_hres_active(base)) hrtimer_force_reprogram(base, 0); else hrtimer_update_next_event(base); raw_spin_unlock(&base->lock); } /* * When a timer is enqueued and expires earlier than the already enqueued * timers, we have to check, whether it expires earlier than the timer for * which the clock event device was armed. * * Called with interrupts disabled and base->cpu_base.lock held */ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base = timer->base; ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); /* * CLOCK_REALTIME timer might be requested with an absolute * expiry time which is less than base->offset. Set it to 0. */ if (expires < 0) expires = 0; if (timer->is_soft) { /* * soft hrtimer could be started on a remote CPU. In this * case softirq_expires_next needs to be updated on the * remote CPU. The soft hrtimer will not expire before the * first hard hrtimer on the remote CPU - * hrtimer_check_target() prevents this case. */ struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; if (timer_cpu_base->softirq_activated) return; if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) return; timer_cpu_base->softirq_next_timer = timer; timer_cpu_base->softirq_expires_next = expires; if (!ktime_before(expires, timer_cpu_base->expires_next) || !reprogram) return; } /* * If the timer is not on the current cpu, we cannot reprogram * the other cpus clock event device. */ if (base->cpu_base != cpu_base) return; if (expires >= cpu_base->expires_next) return; /* * If the hrtimer interrupt is running, then it will reevaluate the * clock bases and reprogram the clock event device. */ if (cpu_base->in_hrtirq) return; cpu_base->next_timer = timer; __hrtimer_reprogram(cpu_base, timer, expires); } static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int active) { struct hrtimer_clock_base *base; unsigned int seq; ktime_t expires; /* * Update the base offsets unconditionally so the following * checks whether the SMP function call is required works. * * The update is safe even when the remote CPU is in the hrtimer * interrupt or the hrtimer soft interrupt and expiring affected * bases. Either it will see the update before handling a base or * it will see it when it finishes the processing and reevaluates * the next expiring timer. */ seq = cpu_base->clock_was_set_seq; hrtimer_update_base(cpu_base); /* * If the sequence did not change over the update then the * remote CPU already handled it. */ if (seq == cpu_base->clock_was_set_seq) return false; /* * If the remote CPU is currently handling an hrtimer interrupt, it * will reevaluate the first expiring timer of all clock bases * before reprogramming. Nothing to do here. */ if (cpu_base->in_hrtirq) return false; /* * Walk the affected clock bases and check whether the first expiring * timer in a clock base is moving ahead of the first expiring timer of * @cpu_base. If so, the IPI must be invoked because per CPU clock * event devices cannot be remotely reprogrammed. */ active &= cpu_base->active_bases; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; next = timerqueue_getnext(&base->active); expires = ktime_sub(next->expires, base->offset); if (expires < cpu_base->expires_next) return true; /* Extra check for softirq clock bases */ if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT) continue; if (cpu_base->softirq_activated) continue; if (expires < cpu_base->softirq_expires_next) return true; } return false; } /* * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and * CLOCK_BOOTTIME (for late sleep time injection). * * This requires to update the offsets for these clocks * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this * also requires to eventually reprogram the per CPU clock event devices * when the change moves an affected timer ahead of the first expiring * timer on that CPU. Obviously remote per CPU clock event devices cannot * be reprogrammed. The other reason why an IPI has to be sent is when the * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets * in the tick, which obviously might be stopped, so this has to bring out * the remote CPU which might sleep in idle to get this sorted. */ void clock_was_set(unsigned int bases) { struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases); cpumask_var_t mask; int cpu; if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active) goto out_timerfd; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { on_each_cpu(retrigger_next_event, NULL, 1); goto out_timerfd; } /* Avoid interrupting CPUs if possible */ cpus_read_lock(); for_each_online_cpu(cpu) { unsigned long flags; cpu_base = &per_cpu(hrtimer_bases, cpu); raw_spin_lock_irqsave(&cpu_base->lock, flags); if (update_needs_ipi(cpu_base, bases)) cpumask_set_cpu(cpu, mask); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } preempt_disable(); smp_call_function_many(mask, retrigger_next_event, NULL, 1); preempt_enable(); cpus_read_unlock(); free_cpumask_var(mask); out_timerfd: timerfd_clock_was_set(); } static void clock_was_set_work(struct work_struct *work) { clock_was_set(CLOCK_SET_WALL); } static DECLARE_WORK(hrtimer_work, clock_was_set_work); /* * Called from timekeeping code to reprogram the hrtimer interrupt device * on all cpus and to notify timerfd. */ void clock_was_set_delayed(void) { schedule_work(&hrtimer_work); } /* * Called during resume either directly from via timekeeping_resume() * or in the case of s2idle from tick_unfreeze() to ensure that the * hrtimers are up to date. */ void hrtimers_resume_local(void) { lockdep_assert_irqs_disabled(); /* Retrigger on the local CPU */ retrigger_next_event(NULL); } /* * Counterpart to lock_hrtimer_base above: */ static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __releases(&timer->base->cpu_base->lock) { raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); } /** * hrtimer_forward() - forward the timer expiry * @timer: hrtimer to forward * @now: forward past this time * @interval: the interval to forward * * Forward the timer expiry so it will expire in the future. * * .. note:: * This only updates the timer expiry value and does not requeue the timer. * * There is also a variant of the function hrtimer_forward_now(). * * Context: Can be safely called from the callback function of @timer. If called * from other contexts @timer must neither be enqueued nor running the * callback and the caller needs to take care of serialization. * * Return: The number of overruns are returned. */ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) { u64 orun = 1; ktime_t delta; delta = ktime_sub(now, hrtimer_get_expires(timer)); if (delta < 0) return 0; if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) return 0; if (interval < hrtimer_resolution) interval = hrtimer_resolution; if (unlikely(delta >= interval)) { s64 incr = ktime_to_ns(interval); orun = ktime_divns(delta, incr); hrtimer_add_expires_ns(timer, incr * orun); if (hrtimer_get_expires_tv64(timer) > now) return orun; /* * This (and the ktime_add() below) is the * correction for exact: */ orun++; } hrtimer_add_expires(timer, interval); return orun; } EXPORT_SYMBOL_GPL(hrtimer_forward); /* * enqueue_hrtimer - internal function to (re)start a timer * * The timer is inserted in expiry order. Insertion into the * red black tree is O(log(n)). Must hold the base lock. * * Returns 1 when the new timer is the leftmost timer in the tree. */ static int enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, enum hrtimer_mode mode) { debug_activate(timer, mode); WARN_ON_ONCE(!base->cpu_base->online); base->cpu_base->active_bases |= 1 << base->index; /* Pairs with the lockless read in hrtimer_is_queued() */ WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); return timerqueue_add(&base->active, &timer->node); } /* * __remove_hrtimer - internal function to remove a timer * * Caller must hold the base lock. * * High resolution timer mode reprograms the clock event device when the * timer is the one which expires next. The caller can disable this by setting * reprogram to zero. This is useful, when the context does a reprogramming * anyway (e.g. timer interrupt) */ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, u8 newstate, int reprogram) { struct hrtimer_cpu_base *cpu_base = base->cpu_base; u8 state = timer->state; /* Pairs with the lockless read in hrtimer_is_queued() */ WRITE_ONCE(timer->state, newstate); if (!(state & HRTIMER_STATE_ENQUEUED)) return; if (!timerqueue_del(&base->active, &timer->node)) cpu_base->active_bases &= ~(1 << base->index); /* * Note: If reprogram is false we do not update * cpu_base->next_timer. This happens when we remove the first * timer on a remote cpu. No harm as we never dereference * cpu_base->next_timer. So the worst thing what can happen is * an superfluous call to hrtimer_force_reprogram() on the * remote cpu later on if the same timer gets enqueued again. */ if (reprogram && timer == cpu_base->next_timer) hrtimer_force_reprogram(cpu_base, 1); } /* * remove hrtimer, called with base lock held */ static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart, bool keep_local) { u8 state = timer->state; if (state & HRTIMER_STATE_ENQUEUED) { bool reprogram; /* * Remove the timer and force reprogramming when high * resolution mode is active and the timer is on the current * CPU. If we remove a timer on another CPU, reprogramming is * skipped. The interrupt event on this CPU is fired and * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ debug_deactivate(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); /* * If the timer is not restarted then reprogramming is * required if the timer is local. If it is local and about * to be restarted, avoid programming it twice (on removal * and a moment later when it's requeued). */ if (!restart) state = HRTIMER_STATE_INACTIVE; else reprogram &= !keep_local; __remove_hrtimer(timer, base, state, reprogram); return 1; } return 0; } static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { #ifdef CONFIG_TIME_LOW_RES /* * CONFIG_TIME_LOW_RES indicates that the system has no way to return * granular time values. For relative timers we add hrtimer_resolution * (i.e. one jiffy) to prevent short timeouts. */ timer->is_rel = mode & HRTIMER_MODE_REL; if (timer->is_rel) tim = ktime_add_safe(tim, hrtimer_resolution); #endif return tim; } static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) { ktime_t expires; /* * Find the next SOFT expiration. */ expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); /* * reprogramming needs to be triggered, even if the next soft * hrtimer expires at the same time than the next hard * hrtimer. cpu_base->softirq_expires_next needs to be updated! */ if (expires == KTIME_MAX) return; /* * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() * cpu_base->*expires_next is only set by hrtimer_reprogram() */ hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); } static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) { struct hrtimer_clock_base *new_base; bool force_local, first; /* * If the timer is on the local cpu base and is the first expiring * timer then this might end up reprogramming the hardware twice * (on removal and on enqueue). To avoid that by prevent the * reprogram on removal, keep the timer local to the current CPU * and enforce reprogramming after it is queued no matter whether * it is the new first expiring timer again or not. */ force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); force_local &= base->cpu_base->next_timer == timer; /* * Remove an active timer from the queue. In case it is not queued * on the current CPU, make sure that remove_hrtimer() updates the * remote data correctly. * * If it's on the current CPU and the first expiring timer, then * skip reprogramming, keep the timer local and enforce * reprogramming later if it was the first expiring timer. This * avoids programming the underlying clock event twice (once at * removal and once after enqueue). */ remove_hrtimer(timer, base, true, force_local); if (mode & HRTIMER_MODE_REL) tim = ktime_add_safe(tim, base->get_time()); tim = hrtimer_update_lowres(timer, tim, mode); hrtimer_set_expires_range_ns(timer, tim, delta_ns); /* Switch the timer base, if necessary: */ if (!force_local) { new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); } else { new_base = base; } first = enqueue_hrtimer(timer, new_base, mode); if (!force_local) return first; /* * Timer was forced to stay on the current CPU to avoid * reprogramming on removal and enqueue. Force reprogram the * hardware by evaluating the new first expiring timer. */ hrtimer_force_reprogram(new_base->cpu_base, 1); return 0; } /** * hrtimer_start_range_ns - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); * softirq based mode is considered for debug purpose only! */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) { struct hrtimer_clock_base *base; unsigned long flags; if (WARN_ON_ONCE(!timer->function)) return; /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard * expiry mode because unmarked timers are moved to softirq expiry. */ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); else WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); base = lock_hrtimer_base(timer, &flags); if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) hrtimer_reprogram(timer, true); unlock_hrtimer_base(timer, &flags); } EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); /** * hrtimer_try_to_cancel - try to deactivate a timer * @timer: hrtimer to stop * * Returns: * * * 0 when the timer was not active * * 1 when the timer was active * * -1 when the timer is currently executing the callback function and * cannot be stopped */ int hrtimer_try_to_cancel(struct hrtimer *timer) { struct hrtimer_clock_base *base; unsigned long flags; int ret = -1; /* * Check lockless first. If the timer is not active (neither * enqueued nor running the callback, nothing to do here. The * base lock does not serialize against a concurrent enqueue, * so we can avoid taking it. */ if (!hrtimer_active(timer)) return 0; base = lock_hrtimer_base(timer, &flags); if (!hrtimer_callback_running(timer)) ret = remove_hrtimer(timer, base, false, false); unlock_hrtimer_base(timer, &flags); return ret; } EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); #ifdef CONFIG_PREEMPT_RT static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { spin_lock_init(&base->softirq_expiry_lock); } static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) __acquires(&base->softirq_expiry_lock) { spin_lock(&base->softirq_expiry_lock); } static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) __releases(&base->softirq_expiry_lock) { spin_unlock(&base->softirq_expiry_lock); } /* * The counterpart to hrtimer_cancel_wait_running(). * * If there is a waiter for cpu_base->expiry_lock, then it was waiting for * the timer callback to finish. Drop expiry_lock and reacquire it. That * allows the waiter to acquire the lock and make progress. */ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, unsigned long flags) { if (atomic_read(&cpu_base->timer_waiters)) { raw_spin_unlock_irqrestore(&cpu_base->lock, flags); spin_unlock(&cpu_base->softirq_expiry_lock); spin_lock(&cpu_base->softirq_expiry_lock); raw_spin_lock_irq(&cpu_base->lock); } } /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was * running. * * This prevents priority inversion: if the soft irq thread is preempted * in the middle of a timer callback, then calling del_timer_sync() can * lead to two issues: * * - If the caller is on a remote CPU then it has to spin wait for the timer * handler to complete. This can result in unbound priority inversion. * * - If the caller originates from the task which preempted the timer * handler on the same CPU, then spin waiting for the timer handler to * complete is never going to end. */ void hrtimer_cancel_wait_running(const struct hrtimer *timer) { /* Lockless read. Prevent the compiler from reloading it below */ struct hrtimer_clock_base *base = READ_ONCE(timer->base); /* * Just relax if the timer expires in hard interrupt context or if * it is currently on the migration base. */ if (!timer->is_soft || is_migration_base(base)) { cpu_relax(); return; } /* * Mark the base as contended and grab the expiry lock, which is * held by the softirq across the timer callback. Drop the lock * immediately so the softirq can expire the next timer. In theory * the timer could already be running again, but that's more than * unlikely and just causes another wait loop. */ atomic_inc(&base->cpu_base->timer_waiters); spin_lock_bh(&base->cpu_base->softirq_expiry_lock); atomic_dec(&base->cpu_base->timer_waiters); spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); } #else static inline void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } static inline void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } static inline void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, unsigned long flags) { } #endif /** * hrtimer_cancel - cancel a timer and wait for the handler to finish. * @timer: the timer to be cancelled * * Returns: * 0 when the timer was not active * 1 when the timer was active */ int hrtimer_cancel(struct hrtimer *timer) { int ret; do { ret = hrtimer_try_to_cancel(timer); if (ret < 0) hrtimer_cancel_wait_running(timer); } while (ret < 0); return ret; } EXPORT_SYMBOL_GPL(hrtimer_cancel); /** * __hrtimer_get_remaining - get remaining time for the timer * @timer: the timer to read * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y */ ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) { unsigned long flags; ktime_t rem; lock_hrtimer_base(timer, &flags); if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) rem = hrtimer_expires_remaining_adjusted(timer); else rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); return rem; } EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); #ifdef CONFIG_NO_HZ_COMMON /** * hrtimer_get_next_event - get the time until next expiry event * * Returns the next expiry time or KTIME_MAX if no timer is pending. */ u64 hrtimer_get_next_event(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); u64 expires = KTIME_MAX; unsigned long flags; raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!hrtimer_hres_active(cpu_base)) expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); return expires; } /** * hrtimer_next_event_without - time until next expiry event w/o one timer * @exclude: timer to exclude * * Returns the next expiry time over all timers except for the @exclude one or * KTIME_MAX if none of them is pending. */ u64 hrtimer_next_event_without(const struct hrtimer *exclude) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); u64 expires = KTIME_MAX; unsigned long flags; raw_spin_lock_irqsave(&cpu_base->lock, flags); if (hrtimer_hres_active(cpu_base)) { unsigned int active; if (!cpu_base->softirq_activated) { active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; expires = __hrtimer_next_event_base(cpu_base, exclude, active, KTIME_MAX); } active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; expires = __hrtimer_next_event_base(cpu_base, exclude, active, expires); } raw_spin_unlock_irqrestore(&cpu_base->lock, flags); return expires; } #endif static inline int hrtimer_clockid_to_base(clockid_t clock_id) { if (likely(clock_id < MAX_CLOCKS)) { int base = hrtimer_clock_to_base_table[clock_id]; if (likely(base != HRTIMER_MAX_CLOCK_BASES)) return base; } WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); return HRTIMER_BASE_MONOTONIC; } static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { bool softtimer = !!(mode & HRTIMER_MODE_SOFT); struct hrtimer_cpu_base *cpu_base; int base; /* * On PREEMPT_RT enabled kernels hrtimers which are not explicitly * marked for hard interrupt expiry mode are moved into soft * interrupt context for latency reasons and because the callbacks * can invoke functions which might sleep on RT, e.g. spin_lock(). */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) softtimer = true; memset(timer, 0, sizeof(struct hrtimer)); cpu_base = raw_cpu_ptr(&hrtimer_bases); /* * POSIX magic: Relative CLOCK_REALTIME timers are not affected by * clock modifications, so they needs to become CLOCK_MONOTONIC to * ensure POSIX compliance. */ if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) clock_id = CLOCK_MONOTONIC; base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; base += hrtimer_clockid_to_base(clock_id); timer->is_soft = softtimer; timer->is_hard = !!(mode & HRTIMER_MODE_HARD); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); } /** * hrtimer_init - initialize a timer to the given clock * @timer: the timer to be initialized * @clock_id: the clock to be used * @mode: The modes which are relevant for initialization: * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, * HRTIMER_MODE_REL_SOFT * * The PINNED variants of the above can be handed in, * but the PINNED bit is ignored as pinning happens * when the hrtimer is started */ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { debug_init(timer, clock_id, mode); __hrtimer_init(timer, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_init); /* * A timer is active, when it is enqueued into the rbtree or the * callback function is running or it's in the state of being migrated * to another cpu. * * It is important for this function to not return a false negative. */ bool hrtimer_active(const struct hrtimer *timer) { struct hrtimer_clock_base *base; unsigned int seq; do { base = READ_ONCE(timer->base); seq = raw_read_seqcount_begin(&base->seq); if (timer->state != HRTIMER_STATE_INACTIVE || base->running == timer) return true; } while (read_seqcount_retry(&base->seq, seq) || base != READ_ONCE(timer->base)); return false; } EXPORT_SYMBOL_GPL(hrtimer_active); /* * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 * distinct sections: * * - queued: the timer is queued * - callback: the timer is being ran * - post: the timer is inactive or (re)queued * * On the read side we ensure we observe timer->state and cpu_base->running * from the same section, if anything changed while we looked at it, we retry. * This includes timer->base changing because sequence numbers alone are * insufficient for that. * * The sequence numbers are required because otherwise we could still observe * a false negative if the read side got smeared over multiple consecutive * __run_hrtimer() invocations. */ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base, struct hrtimer *timer, ktime_t *now, unsigned long flags) __must_hold(&cpu_base->lock) { enum hrtimer_restart (*fn)(struct hrtimer *); bool expires_in_hardirq; int restart; lockdep_assert_held(&cpu_base->lock); debug_deactivate(timer); base->running = timer; /* * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in * hrtimer_active() cannot observe base->running == NULL && * timer->state == INACTIVE. */ raw_write_seqcount_barrier(&base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); fn = timer->function; /* * Clear the 'is relative' flag for the TIME_LOW_RES case. If the * timer is restarted with a period then it becomes an absolute * timer. If its not restarted it does not matter. */ if (IS_ENABLED(CONFIG_TIME_LOW_RES)) timer->is_rel = false; /* * The timer is marked as running in the CPU base, so it is * protected against migration to a different CPU even if the lock * is dropped. */ raw_spin_unlock_irqrestore(&cpu_base->lock, flags); trace_hrtimer_expire_entry(timer, now); expires_in_hardirq = lockdep_hrtimer_enter(timer); restart = fn(timer); lockdep_hrtimer_exit(expires_in_hardirq); trace_hrtimer_expire_exit(timer); raw_spin_lock_irq(&cpu_base->lock); /* * Note: We clear the running state after enqueue_hrtimer and * we do not reprogram the event hardware. Happens either in * hrtimer_start_range_ns() or in hrtimer_interrupt() * * Note: Because we dropped the cpu_base->lock above, * hrtimer_start_range_ns() can have popped in and enqueued the timer * for us already. */ if (restart != HRTIMER_NORESTART && !(timer->state & HRTIMER_STATE_ENQUEUED)) enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); /* * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in * hrtimer_active() cannot observe base->running.timer == NULL && * timer->state == INACTIVE. */ raw_write_seqcount_barrier(&base->seq); WARN_ON_ONCE(base->running != timer); base->running = NULL; } static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, unsigned long flags, unsigned int active_mask) { struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases & active_mask; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *node; ktime_t basenow; basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { struct hrtimer *timer; timer = container_of(node, struct hrtimer, node); /* * The immediate goal for using the softexpires is * minimizing wakeups, not running timers at the * earliest interrupt after their soft expiration. * This allows us to avoid using a Priority Search * Tree, which can answer a stabbing query for * overlapping intervals and instead use the simple * BST we already have. * We don't add extra wakeups by delaying timers that * are right-of a not yet expired timer, because that * timer will have to trigger a wakeup anyway. */ if (basenow < hrtimer_get_softexpires_tv64(timer)) break; __run_hrtimer(cpu_base, base, timer, &basenow, flags); if (active_mask == HRTIMER_ACTIVE_SOFT) hrtimer_sync_wait_running(cpu_base, flags); } } } static __latent_entropy void hrtimer_run_softirq(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); unsigned long flags; ktime_t now; hrtimer_cpu_base_lock_expiry(cpu_base); raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); cpu_base->softirq_activated = 0; hrtimer_update_softirq_timer(cpu_base, true); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); hrtimer_cpu_base_unlock_expiry(cpu_base); } #ifdef CONFIG_HIGH_RES_TIMERS /* * High resolution timer interrupt * Called with interrupts disabled */ void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ktime_t expires_next, now, entry_time, delta; unsigned long flags; int retries = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event = KTIME_MAX; raw_spin_lock_irqsave(&cpu_base->lock, flags); entry_time = now = hrtimer_update_base(cpu_base); retry: cpu_base->in_hrtirq = 1; /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via * the migration code. This does not affect enqueueing of * timers which run their callback and need to be requeued on * this CPU. */ cpu_base->expires_next = KTIME_MAX; if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; raise_softirq_irqoff(HRTIMER_SOFTIRQ); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); /* Reevaluate the clock bases for the [soft] next expiry */ expires_next = hrtimer_update_next_event(cpu_base); /* * Store the new expiry value so the migration code can verify * against it. */ cpu_base->expires_next = expires_next; cpu_base->in_hrtirq = 0; raw_spin_unlock_irqrestore(&cpu_base->lock, flags); /* Reprogramming necessary ? */ if (!tick_program_event(expires_next, 0)) { cpu_base->hang_detected = 0; return; } /* * The next timer was already expired due to: * - tracing * - long lasting callbacks * - being scheduled away when running in a VM * * We need to prevent that we loop forever in the hrtimer * interrupt routine. We give it 3 attempts to avoid * overreacting on some spurious event. * * Acquire base lock for updating the offsets and retrieving * the current time. */ raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) goto retry; /* * Give the system a chance to do something else than looping * here. We stored the entry time, so we know exactly how long * we spent here. We schedule the next event this amount of * time away. */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; raw_spin_unlock_irqrestore(&cpu_base->lock, flags); delta = ktime_sub(now, entry_time); if ((unsigned int)delta > cpu_base->max_hang_time) cpu_base->max_hang_time = (unsigned int) delta; /* * Limit it to a sensible value as we enforce a longer * delay. Give the CPU at least 100ms to catch up. */ if (delta > 100 * NSEC_PER_MSEC) expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); else expires_next = ktime_add(now, delta); tick_program_event(expires_next, 1); pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); } #endif /* !CONFIG_HIGH_RES_TIMERS */ /* * Called from run_local_timers in hardirq context every jiffy */ void hrtimer_run_queues(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); unsigned long flags; ktime_t now; if (hrtimer_hres_active(cpu_base)) return; /* * This _is_ ugly: We have to check periodically, whether we * can switch to highres and / or nohz mode. The clocksource * switch happens with xtime_lock held. Notification from * there only sets the check bit in the tick_oneshot code, * otherwise we might deadlock vs. xtime_lock. */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { hrtimer_switch_to_hres(); return; } raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; raise_softirq_irqoff(HRTIMER_SOFTIRQ); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } /* * Sleep related functions: */ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) { struct hrtimer_sleeper *t = container_of(timer, struct hrtimer_sleeper, timer); struct task_struct *task = t->task; t->task = NULL; if (task) wake_up_process(task); return HRTIMER_NORESTART; } /** * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer * @sl: sleeper to be started * @mode: timer mode abs/rel * * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) */ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, enum hrtimer_mode mode) { /* * Make the enqueue delivery mode check work on RT. If the sleeper * was initialized for hard interrupt delivery, force the mode bit. * This is a special case for hrtimer_sleepers because * hrtimer_init_sleeper() determines the delivery mode on RT so the * fiddling with this decision is avoided at the call sites. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) mode |= HRTIMER_MODE_HARD; hrtimer_start_expires(&sl->timer, mode); } EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { /* * On PREEMPT_RT enabled kernels hrtimers which are not explicitly * marked for hard interrupt expiry mode are moved into soft * interrupt context either for latency reasons or because the * hrtimer callback takes regular spinlocks or invokes other * functions which are not suitable for hard interrupt context on * PREEMPT_RT. * * The hrtimer_sleeper callback is RT compatible in hard interrupt * context, but there is a latency concern: Untrusted userspace can * spawn many threads which arm timers for the same expiry time on * the same CPU. That causes a latency spike due to the wakeup of * a gazillion threads. * * OTOH, privileged real-time user space applications rely on the * low latency of hard interrupt wakeups. If the current task is in * a real-time scheduling class, mark the mode for hard interrupt * expiry. */ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { if (rt_or_dl_task_policy(current) && !(mode & HRTIMER_MODE_SOFT)) mode |= HRTIMER_MODE_HARD; } __hrtimer_init(&sl->timer, clock_id, mode); sl->timer.function = hrtimer_wakeup; sl->task = current; } /** * hrtimer_init_sleeper - initialize sleeper to the given clock * @sl: sleeper to be initialized * @clock_id: the clock to be used * @mode: timer mode abs/rel */ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { debug_init(&sl->timer, clock_id, mode); __hrtimer_init_sleeper(sl, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) { switch(restart->nanosleep.type) { #ifdef CONFIG_COMPAT_32BIT_TIME case TT_COMPAT: if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp)) return -EFAULT; break; #endif case TT_NATIVE: if (put_timespec64(ts, restart->nanosleep.rmtp)) return -EFAULT; break; default: BUG(); } return -ERESTART_RESTARTBLOCK; } static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) { struct restart_block *restart; do { set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); hrtimer_sleeper_start_expires(t, mode); if (likely(t->task)) schedule(); hrtimer_cancel(&t->timer); mode = HRTIMER_MODE_ABS; } while (t->task && !signal_pending(current)); __set_current_state(TASK_RUNNING); if (!t->task) return 0; restart = ¤t->restart_block; if (restart->nanosleep.type != TT_NONE) { ktime_t rem = hrtimer_expires_remaining(&t->timer); struct timespec64 rmt; if (rem <= 0) return 0; rmt = ktime_to_timespec64(rem); return nanosleep_copyout(restart, &rmt); } return -ERESTART_RESTARTBLOCK; } static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; int ret; hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); ret = do_nanosleep(&t, HRTIMER_MODE_ABS); destroy_hrtimer_on_stack(&t.timer); return ret; } long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, const clockid_t clockid) { struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; hrtimer_init_sleeper_on_stack(&t, clockid, mode); hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) goto out; /* Absolute timers do not update the rmtp value and restart: */ if (mode == HRTIMER_MODE_ABS) { ret = -ERESTARTNOHAND; goto out; } restart = ¤t->restart_block; restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); set_restart_fn(restart, hrtimer_nanosleep_restart); out: destroy_hrtimer_on_stack(&t.timer); return ret; } #ifdef CONFIG_64BIT SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, struct __kernel_timespec __user *, rmtp) { struct timespec64 tu; if (get_timespec64(&tu, rqtp)) return -EFAULT; if (!timespec64_valid(&tu)) return -EINVAL; current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC); } #endif #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, struct old_timespec32 __user *, rmtp) { struct timespec64 tu; if (get_old_timespec32(&tu, rqtp)) return -EFAULT; if (!timespec64_valid(&tu)) return -EINVAL; current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC); } #endif /* * Functions related to boot-time initialization: */ int hrtimers_prepare_cpu(unsigned int cpu) { struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; clock_b->cpu_base = cpu_base; seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); timerqueue_init_head(&clock_b->active); } cpu_base->cpu = cpu; cpu_base->active_bases = 0; cpu_base->hres_active = 0; cpu_base->hang_detected = 0; cpu_base->next_timer = NULL; cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->online = 1; hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } #ifdef CONFIG_HOTPLUG_CPU static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, struct hrtimer_clock_base *new_base) { struct hrtimer *timer; struct timerqueue_node *node; while ((node = timerqueue_getnext(&old_base->active))) { timer = container_of(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); debug_deactivate(timer); /* * Mark it as ENQUEUED not INACTIVE otherwise the * timer could be seen as !active and just vanish away * under us on another CPU */ __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); timer->base = new_base; /* * Enqueue the timers on the new cpu. This does not * reprogram the event device in case the timer * expires before the earliest on this CPU, but we run * hrtimer_interrupt after we migrated everything to * sort out already expired timers and reprogram the * event device. */ enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); } } int hrtimers_cpu_dying(unsigned int dying_cpu) { int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); struct hrtimer_cpu_base *old_base, *new_base; old_base = this_cpu_ptr(&hrtimer_bases); new_base = &per_cpu(hrtimer_bases, ncpu); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ raw_spin_lock(&old_base->lock); raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } /* * The migration might have changed the first expiring softirq * timer on this CPU. Update it. */ __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); /* Tell the other CPU to retrigger the next event */ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); raw_spin_unlock(&new_base->lock); old_base->online = 0; raw_spin_unlock(&old_base->lock); return 0; } #endif /* CONFIG_HOTPLUG_CPU */ void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * @clock_id: timer clock to be used */ int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, clockid_t clock_id) { struct hrtimer_sleeper t; /* * Optimize when a zero timeout value is given. It does not * matter whether this is an absolute or a relative time. */ if (expires && *expires == 0) { __set_current_state(TASK_RUNNING); return 0; } /* * A NULL parameter means "infinite" */ if (!expires) { schedule(); return -EINTR; } hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); if (likely(t.task)) schedule(); hrtimer_cancel(&t.timer); destroy_hrtimer_on_stack(&t.timer); __set_current_state(TASK_RUNNING); return !t.task ? 0 : -EINTR; } EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless * the current task state has been set (see set_current_state()). * * The @delta argument gives the kernel the freedom to schedule the * actual wakeup to a time that is both power and performance friendly * for regular (non RT/DL) tasks. * The kernel give the normal best effort behavior for "@expires+@delta", * but may decide to fire the timer earlier, but no earlier than @expires. * * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to * pass before the routine returns unless the current task is explicitly * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is * delivered to the current task or the current task is explicitly woken * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * * Returns 0 when the timer has expired. If the task was woken before the * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode) { return schedule_hrtimeout_range_clock(expires, delta, mode, CLOCK_MONOTONIC); } EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); /** * schedule_hrtimeout - sleep until timeout * @expires: timeout value (ktime_t) * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless * the current task state has been set (see set_current_state()). * * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to * pass before the routine returns unless the current task is explicitly * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is * delivered to the current task or the current task is explicitly woken * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * * Returns 0 when the timer has expired. If the task was woken before the * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode) { return schedule_hrtimeout_range(expires, 0, mode); } EXPORT_SYMBOL_GPL(schedule_hrtimeout); |
1 1 3 3 1 2 3 3 2 2 2 2 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_skbprio.c SKB Priority Queue. * * Authors: Nishanth Devarajan, <ndev2021@gmail.com> * Cody Doucette, <doucette@bu.edu> * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu */ #include <linux/string.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> #include <net/inet_ecn.h> /* SKB Priority Queue * ================================= * * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes * packets according to their skb->priority field. Under congestion, * Skbprio drops already-enqueued lower priority packets to make space * available for higher priority packets; it was conceived as a solution * for denial-of-service defenses that need to route packets with different * priorities as a mean to overcome DoS attacks. */ struct skbprio_sched_data { /* Queue state. */ struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY]; struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY]; u16 highest_prio; u16 lowest_prio; }; static u16 calc_new_high_prio(const struct skbprio_sched_data *q) { int prio; for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) { if (!skb_queue_empty(&q->qdiscs[prio])) return prio; } /* SKB queue is empty, return 0 (default highest priority setting). */ return 0; } static u16 calc_new_low_prio(const struct skbprio_sched_data *q) { int prio; for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) { if (!skb_queue_empty(&q->qdiscs[prio])) return prio; } /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1 * (default lowest priority setting). */ return SKBPRIO_MAX_PRIORITY - 1; } static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1; struct skbprio_sched_data *q = qdisc_priv(sch); struct sk_buff_head *qdisc; struct sk_buff_head *lp_qdisc; struct sk_buff *to_drop; u16 prio, lp; /* Obtain the priority of @skb. */ prio = min(skb->priority, max_priority); qdisc = &q->qdiscs[prio]; /* sch->limit can change under us from skbprio_change() */ if (sch->q.qlen < READ_ONCE(sch->limit)) { __skb_queue_tail(qdisc, skb); qdisc_qstats_backlog_inc(sch, skb); q->qstats[prio].backlog += qdisc_pkt_len(skb); /* Check to update highest and lowest priorities. */ if (prio > q->highest_prio) q->highest_prio = prio; if (prio < q->lowest_prio) q->lowest_prio = prio; sch->q.qlen++; return NET_XMIT_SUCCESS; } /* If this packet has the lowest priority, drop it. */ lp = q->lowest_prio; if (prio <= lp) { q->qstats[prio].drops++; q->qstats[prio].overlimits++; return qdisc_drop(skb, sch, to_free); } __skb_queue_tail(qdisc, skb); qdisc_qstats_backlog_inc(sch, skb); q->qstats[prio].backlog += qdisc_pkt_len(skb); /* Drop the packet at the tail of the lowest priority qdisc. */ lp_qdisc = &q->qdiscs[lp]; to_drop = __skb_dequeue_tail(lp_qdisc); BUG_ON(!to_drop); qdisc_qstats_backlog_dec(sch, to_drop); qdisc_drop(to_drop, sch, to_free); q->qstats[lp].backlog -= qdisc_pkt_len(to_drop); q->qstats[lp].drops++; q->qstats[lp].overlimits++; /* Check to update highest and lowest priorities. */ if (skb_queue_empty(lp_qdisc)) { if (q->lowest_prio == q->highest_prio) { /* The incoming packet is the only packet in queue. */ BUG_ON(sch->q.qlen != 1); q->lowest_prio = prio; q->highest_prio = prio; } else { q->lowest_prio = calc_new_low_prio(q); } } if (prio > q->highest_prio) q->highest_prio = prio; return NET_XMIT_CN; } static struct sk_buff *skbprio_dequeue(struct Qdisc *sch) { struct skbprio_sched_data *q = qdisc_priv(sch); struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio]; struct sk_buff *skb = __skb_dequeue(hpq); if (unlikely(!skb)) return NULL; sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb); /* Update highest priority field. */ if (skb_queue_empty(hpq)) { if (q->lowest_prio == q->highest_prio) { BUG_ON(sch->q.qlen); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; } else { q->highest_prio = calc_new_high_prio(q); } } return skb; } static int skbprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct tc_skbprio_qopt *ctl = nla_data(opt); if (opt->nla_len != nla_attr_size(sizeof(*ctl))) return -EINVAL; WRITE_ONCE(sch->limit, ctl->limit); return 0; } static int skbprio_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct skbprio_sched_data *q = qdisc_priv(sch); int prio; /* Initialise all queues, one for each possible priority. */ for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_head_init(&q->qdiscs[prio]); memset(&q->qstats, 0, sizeof(q->qstats)); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; sch->limit = 64; if (!opt) return 0; return skbprio_change(sch, opt, extack); } static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tc_skbprio_qopt opt; opt.limit = READ_ONCE(sch->limit); if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) return -1; return skb->len; } static void skbprio_reset(struct Qdisc *sch) { struct skbprio_sched_data *q = qdisc_priv(sch); int prio; for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_purge(&q->qdiscs[prio]); memset(&q->qstats, 0, sizeof(q->qstats)); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; } static void skbprio_destroy(struct Qdisc *sch) { struct skbprio_sched_data *q = qdisc_priv(sch); int prio; for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_purge(&q->qdiscs[prio]); } static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; } static unsigned long skbprio_find(struct Qdisc *sch, u32 classid) { return 0; } static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { tcm->tcm_handle |= TC_H_MIN(cl); return 0; } static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct skbprio_sched_data *q = qdisc_priv(sch); if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1], q->qstats[cl - 1].qlen) < 0) return -1; return 0; } static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) { unsigned int i; if (arg->stop) return; for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) { if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; } } static const struct Qdisc_class_ops skbprio_class_ops = { .leaf = skbprio_leaf, .find = skbprio_find, .dump = skbprio_dump_class, .dump_stats = skbprio_dump_class_stats, .walk = skbprio_walk, }; static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = { .cl_ops = &skbprio_class_ops, .id = "skbprio", .priv_size = sizeof(struct skbprio_sched_data), .enqueue = skbprio_enqueue, .dequeue = skbprio_dequeue, .peek = qdisc_peek_dequeued, .init = skbprio_init, .reset = skbprio_reset, .change = skbprio_change, .dump = skbprio_dump, .destroy = skbprio_destroy, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("skbprio"); static int __init skbprio_module_init(void) { return register_qdisc(&skbprio_qdisc_ops); } static void __exit skbprio_module_exit(void) { unregister_qdisc(&skbprio_qdisc_ops); } module_init(skbprio_module_init) module_exit(skbprio_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SKB priority based scheduling qdisc"); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 | /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/count_zeros.h> #include <rdma/ib_umem_odp.h> #include "uverbs.h" static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { bool make_dirty = umem->writable && dirty; struct scatterlist *sg; unsigned int i; if (dirty) ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt, DMA_BIDIRECTIONAL, 0); for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) unpin_user_page_range_dirty_lock(sg_page(sg), DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty); sg_free_append_table(&umem->sgt_append); } /** * ib_umem_find_best_pgsz - Find best HW page size to use for this MR * * @umem: umem struct * @pgsz_bitmap: bitmap of HW supported page sizes * @virt: IOVA * * This helper is intended for HW that support multiple page * sizes but can do only a single page size in an MR. * * Returns 0 if the umem requires page sizes not supported by * the driver to be mapped. Drivers always supporting PAGE_SIZE * or smaller will never see a 0 result. */ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt) { struct scatterlist *sg; unsigned long va, pgoff; dma_addr_t mask; int i; umem->iova = va = virt; if (umem->is_odp) { unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift); /* ODP must always be self consistent. */ if (!(pgsz_bitmap & page_size)) return 0; return page_size; } /* The best result is the smallest page size that results in the minimum * number of required pages. Compute the largest page size that could * work based on VA address bits that don't change. */ mask = pgsz_bitmap & GENMASK(BITS_PER_LONG - 1, bits_per((umem->length - 1 + virt) ^ virt)); /* offset into first SGL */ pgoff = umem->address & ~PAGE_MASK; for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { /* Walk SGL and reduce max page size if VA/PA bits differ * for any address. */ mask |= (sg_dma_address(sg) + pgoff) ^ va; va += sg_dma_len(sg) - pgoff; /* Except for the last entry, the ending iova alignment sets * the maximum possible page size as the low bits of the iova * must be zero when starting the next chunk. */ if (i != (umem->sgt_append.sgt.nents - 1)) mask |= va; pgoff = 0; } /* The mask accumulates 1's in each position where the VA and physical * address differ, thus the length of trailing 0 is the largest page * size that can pass the VA through to the physical. */ if (mask) pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0; } EXPORT_SYMBOL(ib_umem_find_best_pgsz); /** * ib_umem_get - Pin and DMA map userspace memory. * * @device: IB device to connect UMEM * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned */ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access) { struct ib_umem *umem; struct page **page_list; unsigned long lock_limit; unsigned long new_pinned; unsigned long cur_base; unsigned long dma_attr = 0; struct mm_struct *mm; unsigned long npages; int pinned, ret; unsigned int gup_flags = FOLL_LONGTERM; /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. */ if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) return ERR_PTR(-EINVAL); if (!can_do_mlock()) return ERR_PTR(-EPERM); if (access & IB_ACCESS_ON_DEMAND) return ERR_PTR(-EOPNOTSUPP); umem = kzalloc(sizeof(*umem), GFP_KERNEL); if (!umem) return ERR_PTR(-ENOMEM); umem->ibdev = device; umem->length = size; umem->address = addr; /* * Drivers should call ib_umem_find_best_pgsz() to set the iova * correctly. */ umem->iova = addr; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { ret = -ENOMEM; goto umem_kfree; } npages = ib_umem_num_pages(umem); if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; goto out; } lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; new_pinned = atomic64_add_return(npages, &mm->pinned_vm); if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) { atomic64_sub(npages, &mm->pinned_vm); ret = -ENOMEM; goto out; } cur_base = addr & PAGE_MASK; if (umem->writable) gup_flags |= FOLL_WRITE; while (npages) { cond_resched(); pinned = pin_user_pages_fast(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags, page_list); if (pinned < 0) { ret = pinned; goto umem_release; } cur_base += pinned * PAGE_SIZE; npages -= pinned; ret = sg_alloc_append_table_from_pages( &umem->sgt_append, page_list, pinned, 0, pinned << PAGE_SHIFT, ib_dma_max_seg_size(device), npages, GFP_KERNEL); if (ret) { unpin_user_pages_dirty_lock(page_list, pinned, 0); goto umem_release; } } if (access & IB_ACCESS_RELAXED_ORDERING) dma_attr |= DMA_ATTR_WEAK_ORDERING; ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt, DMA_BIDIRECTIONAL, dma_attr); if (ret) goto umem_release; goto out; umem_release: __ib_umem_release(device, umem, 0); atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: free_page((unsigned long) page_list); umem_kfree: if (ret) { mmdrop(umem->owning_mm); kfree(umem); } return ret ? ERR_PTR(ret) : umem; } EXPORT_SYMBOL(ib_umem_get); /** * ib_umem_release - release memory pinned with ib_umem_get * @umem: umem struct to release */ void ib_umem_release(struct ib_umem *umem) { if (!umem) return; if (umem->is_dmabuf) return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem)); if (umem->is_odp) return ib_umem_odp_release(to_ib_umem_odp(umem)); __ib_umem_release(umem->ibdev, umem, 1); atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm); mmdrop(umem->owning_mm); kfree(umem); } EXPORT_SYMBOL(ib_umem_release); /* * Copy from the given ib_umem's pages to the given buffer. * * umem - the umem to copy from * offset - offset to start copying from * dst - destination buffer * length - buffer length * * Returns 0 on success, or an error code. */ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { size_t end = offset + length; int ret; if (offset > umem->length || length > umem->length - offset) { pr_err("%s not in range. offset: %zd umem length: %zd end: %zd\n", __func__, offset, umem->length, end); return -EINVAL; } ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl, umem->sgt_append.sgt.orig_nents, dst, length, offset + ib_umem_offset(umem)); if (ret < 0) return ret; else if (ret != length) return -EINVAL; else return 0; } EXPORT_SYMBOL(ib_umem_copy_from); |
217 144 145 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | // SPDX-License-Identifier: GPL-2.0-or-later /* * x86 instruction attribute tables * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ #include <asm/insn.h> /* __ignore_sync_check__ */ /* Attribute tables are generated from opcode map */ #include "inat-tables.c" /* Attribute search APIs */ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) { return inat_primary_table[opcode]; } int inat_get_last_prefix_id(insn_byte_t last_pfx) { insn_attr_t lpfx_attr; lpfx_attr = inat_get_opcode_attribute(last_pfx); return inat_last_prefix_id(lpfx_attr); } insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr) { const insn_attr_t *table; int n; n = inat_escape_id(esc_attr); table = inat_escape_tables[n][0]; if (!table) return 0; if (inat_has_variant(table[opcode]) && lpfx_id) { table = inat_escape_tables[n][lpfx_id]; if (!table) return 0; } return table[opcode]; } insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t grp_attr) { const insn_attr_t *table; int n; n = inat_group_id(grp_attr); table = inat_group_tables[n][0]; if (!table) return inat_group_common_attribute(grp_attr); if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { table = inat_group_tables[n][lpfx_id]; if (!table) return inat_group_common_attribute(grp_attr); } return table[X86_MODRM_REG(modrm)] | inat_group_common_attribute(grp_attr); } insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, insn_byte_t vex_p) { const insn_attr_t *table; if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) return 0; /* At first, this checks the master table */ table = inat_avx_tables[vex_m][0]; if (!table) return 0; if (!inat_is_group(table[opcode]) && vex_p) { /* If this is not a group, get attribute directly */ table = inat_avx_tables[vex_m][vex_p]; if (!table) return 0; } return table[opcode]; } |
5 2 2 2 75 71 5 5 5 5 1 1 1 1 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | // SPDX-License-Identifier: GPL-2.0 #include "cgroup-internal.h" #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/nsproxy.h> #include <linux/proc_ns.h> /* cgroup namespaces */ static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns) { return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES); } static void dec_cgroup_namespaces(struct ucounts *ucounts) { dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES); } static struct cgroup_namespace *alloc_cgroup_ns(void) { struct cgroup_namespace *new_ns; int ret; new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL_ACCOUNT); if (!new_ns) return ERR_PTR(-ENOMEM); ret = ns_alloc_inum(&new_ns->ns); if (ret) { kfree(new_ns); return ERR_PTR(ret); } refcount_set(&new_ns->ns.count, 1); new_ns->ns.ops = &cgroupns_operations; return new_ns; } void free_cgroup_ns(struct cgroup_namespace *ns) { put_css_set(ns->root_cset); dec_cgroup_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); kfree(ns); } EXPORT_SYMBOL(free_cgroup_ns); struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns) { struct cgroup_namespace *new_ns; struct ucounts *ucounts; struct css_set *cset; BUG_ON(!old_ns); if (!(flags & CLONE_NEWCGROUP)) { get_cgroup_ns(old_ns); return old_ns; } /* Allow only sysadmin to create cgroup namespace. */ if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); ucounts = inc_cgroup_namespaces(user_ns); if (!ucounts) return ERR_PTR(-ENOSPC); /* It is not safe to take cgroup_mutex here */ spin_lock_irq(&css_set_lock); cset = task_css_set(current); get_css_set(cset); spin_unlock_irq(&css_set_lock); new_ns = alloc_cgroup_ns(); if (IS_ERR(new_ns)) { put_css_set(cset); dec_cgroup_namespaces(ucounts); return new_ns; } new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; new_ns->root_cset = cset; return new_ns; } static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) { return container_of(ns, struct cgroup_namespace, ns); } static int cgroupns_install(struct nsset *nsset, struct ns_common *ns) { struct nsproxy *nsproxy = nsset->nsproxy; struct cgroup_namespace *cgroup_ns = to_cg_ns(ns); if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) || !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; /* Don't need to do anything if we are attaching to our own cgroupns. */ if (cgroup_ns == nsproxy->cgroup_ns) return 0; get_cgroup_ns(cgroup_ns); put_cgroup_ns(nsproxy->cgroup_ns); nsproxy->cgroup_ns = cgroup_ns; return 0; } static struct ns_common *cgroupns_get(struct task_struct *task) { struct cgroup_namespace *ns = NULL; struct nsproxy *nsproxy; task_lock(task); nsproxy = task->nsproxy; if (nsproxy) { ns = nsproxy->cgroup_ns; get_cgroup_ns(ns); } task_unlock(task); return ns ? &ns->ns : NULL; } static void cgroupns_put(struct ns_common *ns) { put_cgroup_ns(to_cg_ns(ns)); } static struct user_namespace *cgroupns_owner(struct ns_common *ns) { return to_cg_ns(ns)->user_ns; } const struct proc_ns_operations cgroupns_operations = { .name = "cgroup", .type = CLONE_NEWCGROUP, .get = cgroupns_get, .put = cgroupns_put, .install = cgroupns_install, .owner = cgroupns_owner, }; |
2 2 2 1 1 1 1 1 1 9 2 7 3 1 2 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/module.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_socket.h> #include <net/inet_sock.h> #include <net/tcp.h> struct nft_socket { enum nft_socket_keys key:8; u8 level; /* cgroupv2 level to extract */ u8 level_user; /* cgroupv2 level provided by userspace */ u8 len; union { u8 dreg; }; }; static void nft_socket_wildcard(const struct nft_pktinfo *pkt, struct nft_regs *regs, struct sock *sk, u32 *dest) { switch (nft_pf(pkt)) { case NFPROTO_IPV4: nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr)); break; #endif default: regs->verdict.code = NFT_BREAK; return; } } #ifdef CONFIG_SOCK_CGROUP_DATA static noinline bool nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { struct cgroup *cgrp; u64 cgid; if (!sk_fullsock(sk)) return false; cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level); if (!cgrp) return false; cgid = cgroup_id(cgrp); memcpy(dest, &cgid, sizeof(u64)); return true; } /* process context only, uses current->nsproxy. */ static noinline int nft_socket_cgroup_subtree_level(void) { struct cgroup *cgrp = cgroup_get_from_path("/"); int level; if (IS_ERR(cgrp)) return PTR_ERR(cgrp); level = cgrp->level; cgroup_put(cgrp); if (WARN_ON_ONCE(level > 255)) return -ERANGE; if (WARN_ON_ONCE(level < 0)) return -EINVAL; return level; } #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) { const struct net_device *indev = nft_in(pkt); const struct sk_buff *skb = pkt->skb; struct sock *sk = NULL; if (!indev) return NULL; switch (nft_pf(pkt)) { case NFPROTO_IPV4: sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); break; #endif default: WARN_ON_ONCE(1); break; } return sk; } static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_socket *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; struct sock *sk = skb->sk; u32 *dest = ®s->data[priv->dreg]; if (sk && !net_eq(nft_net(pkt), sock_net(sk))) sk = NULL; if (!sk) sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; return; } switch(priv->key) { case NFT_SOCKET_TRANSPARENT: nft_reg_store8(dest, inet_sk_transparent(sk)); break; case NFT_SOCKET_MARK: if (sk_fullsock(sk)) { *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; goto out_put_sk; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; goto out_put_sk; } nft_socket_wildcard(pkt, regs, sk, dest); break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; goto out_put_sk; } break; #endif default: WARN_ON(1); regs->verdict.code = NFT_BREAK; } out_put_sk: if (sk != skb->sk) sock_gen_put(sk); } static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_socket_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_socket *priv = nft_expr_priv(expr); unsigned int len; if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY]) return -EINVAL; switch(ctx->family) { case NFPROTO_IPV4: #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: #endif case NFPROTO_INET: break; default: return -EOPNOTSUPP; } priv->key = ntohl(nla_get_be32(tb[NFTA_SOCKET_KEY])); switch(priv->key) { case NFT_SOCKET_TRANSPARENT: case NFT_SOCKET_WILDCARD: len = sizeof(u8); break; case NFT_SOCKET_MARK: len = sizeof(u32); break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: { unsigned int level; int err; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; level = ntohl(nla_get_be32(tb[NFTA_SOCKET_LEVEL])); if (level > 255) return -EOPNOTSUPP; err = nft_socket_cgroup_subtree_level(); if (err < 0) return err; priv->level_user = level; level += err; /* Implies a giant cgroup tree */ if (WARN_ON_ONCE(level > 255)) return -EOPNOTSUPP; priv->level = level; len = sizeof(u64); break; } #endif default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_socket *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_SOCKET_KEY, htonl(priv->key))) return -1; if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) return -1; return 0; } static bool nft_socket_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_socket *priv = nft_expr_priv(expr); const struct nft_socket *socket; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } socket = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != socket->key || priv->dreg != socket->dreg || priv->level != socket->level) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && ctx->family != NFPROTO_INET) return -EOPNOTSUPP; return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT)); } static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_socket)), .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, .validate = nft_socket_validate, .reduce = nft_socket_reduce, }; static struct nft_expr_type nft_socket_type __read_mostly = { .name = "socket", .ops = &nft_socket_ops, .policy = nft_socket_policy, .maxattr = NFTA_SOCKET_MAX, .owner = THIS_MODULE, }; static int __init nft_socket_module_init(void) { return nft_register_expr(&nft_socket_type); } static void __exit nft_socket_module_exit(void) { nft_unregister_expr(&nft_socket_type); } module_init(nft_socket_module_init); module_exit(nft_socket_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Máté Eckl"); MODULE_DESCRIPTION("nf_tables socket match module"); MODULE_ALIAS_NFT_EXPR("socket"); |
8 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | // SPDX-License-Identifier: GPL-2.0 /* * linux/drivers/char/misc.c * * Generic misc open routine by Johan Myreen * * Based on code from Linus * * Teemu Rantanen's Microsoft Busmouse support and Derrick Cole's * changes incorporated into 0.97pl4 * by Peter Cervasio (pete%q106fm.uucp@wupost.wustl.edu) (08SEP92) * See busmouse.c for particulars. * * Made things a lot mode modular - easy to compile in just one or two * of the misc drivers, as they are now completely independent. Linus. * * Support for loadable modules. 8-Sep-95 Philip Blundell <pjb27@cam.ac.uk> * * Fixed a failing symbol register to free the device registration * Alan Cox <alan@lxorguk.ukuu.org.uk> 21-Jan-96 * * Dynamic minors and /proc/mice by Alessandro Rubini. 26-Mar-96 * * Renamed to misc and miscdevice to be more accurate. Alan Cox 26-Mar-96 * * Handling of mouse minor numbers for kerneld: * Idea by Jacques Gelinas <jack@solucorp.qc.ca>, * adapted by Bjorn Ekwall <bj0rn@blox.se> * corrected by Alan Cox <alan@lxorguk.ukuu.org.uk> * * Changes for kmod (from kerneld): * Cyrus Durgin <cider@speakeasy.org> * * Added devfs support. Richard Gooch <rgooch@atnf.csiro.au> 10-Jan-1998 */ #include <linux/module.h> #include <linux/fs.h> #include <linux/errno.h> #include <linux/miscdevice.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/device.h> #include <linux/tty.h> #include <linux/kmod.h> #include <linux/gfp.h> /* * Head entry for the doubly linked miscdevice list */ static LIST_HEAD(misc_list); static DEFINE_MUTEX(misc_mtx); /* * Assigned numbers, used for dynamic minors */ #define DYNAMIC_MINORS 128 /* like dynamic majors */ static DEFINE_IDA(misc_minors_ida); static int misc_minor_alloc(void) { int ret; ret = ida_alloc_max(&misc_minors_ida, DYNAMIC_MINORS - 1, GFP_KERNEL); if (ret >= 0) { ret = DYNAMIC_MINORS - ret - 1; } else { ret = ida_alloc_range(&misc_minors_ida, MISC_DYNAMIC_MINOR + 1, MINORMASK, GFP_KERNEL); } return ret; } static void misc_minor_free(int minor) { if (minor < DYNAMIC_MINORS) ida_free(&misc_minors_ida, DYNAMIC_MINORS - minor - 1); else if (minor > MISC_DYNAMIC_MINOR) ida_free(&misc_minors_ida, minor); } #ifdef CONFIG_PROC_FS static void *misc_seq_start(struct seq_file *seq, loff_t *pos) { mutex_lock(&misc_mtx); return seq_list_start(&misc_list, *pos); } static void *misc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_list_next(v, &misc_list, pos); } static void misc_seq_stop(struct seq_file *seq, void *v) { mutex_unlock(&misc_mtx); } static int misc_seq_show(struct seq_file *seq, void *v) { const struct miscdevice *p = list_entry(v, struct miscdevice, list); seq_printf(seq, "%3i %s\n", p->minor, p->name ? p->name : ""); return 0; } static const struct seq_operations misc_seq_ops = { .start = misc_seq_start, .next = misc_seq_next, .stop = misc_seq_stop, .show = misc_seq_show, }; #endif static int misc_open(struct inode *inode, struct file *file) { int minor = iminor(inode); struct miscdevice *c = NULL, *iter; int err = -ENODEV; const struct file_operations *new_fops = NULL; mutex_lock(&misc_mtx); list_for_each_entry(iter, &misc_list, list) { if (iter->minor != minor) continue; c = iter; new_fops = fops_get(iter->fops); break; } if (!new_fops) { mutex_unlock(&misc_mtx); request_module("char-major-%d-%d", MISC_MAJOR, minor); mutex_lock(&misc_mtx); list_for_each_entry(iter, &misc_list, list) { if (iter->minor != minor) continue; c = iter; new_fops = fops_get(iter->fops); break; } if (!new_fops) goto fail; } /* * Place the miscdevice in the file's * private_data so it can be used by the * file operations, including f_op->open below */ file->private_data = c; err = 0; replace_fops(file, new_fops); if (file->f_op->open) err = file->f_op->open(inode, file); fail: mutex_unlock(&misc_mtx); return err; } static char *misc_devnode(const struct device *dev, umode_t *mode) { const struct miscdevice *c = dev_get_drvdata(dev); if (mode && c->mode) *mode = c->mode; if (c->nodename) return kstrdup(c->nodename, GFP_KERNEL); return NULL; } static const struct class misc_class = { .name = "misc", .devnode = misc_devnode, }; static const struct file_operations misc_fops = { .owner = THIS_MODULE, .open = misc_open, .llseek = noop_llseek, }; /** * misc_register - register a miscellaneous device * @misc: device structure * * Register a miscellaneous device with the kernel. If the minor * number is set to %MISC_DYNAMIC_MINOR a minor number is assigned * and placed in the minor field of the structure. For other cases * the minor number requested is used. * * The structure passed is linked into the kernel and may not be * destroyed until it has been unregistered. By default, an open() * syscall to the device sets file->private_data to point to the * structure. Drivers don't need open in fops for this. * * A zero is returned on success and a negative errno code for * failure. */ int misc_register(struct miscdevice *misc) { dev_t dev; int err = 0; bool is_dynamic = (misc->minor == MISC_DYNAMIC_MINOR); INIT_LIST_HEAD(&misc->list); mutex_lock(&misc_mtx); if (is_dynamic) { int i = misc_minor_alloc(); if (i < 0) { err = -EBUSY; goto out; } misc->minor = i; } else { struct miscdevice *c; list_for_each_entry(c, &misc_list, list) { if (c->minor == misc->minor) { err = -EBUSY; goto out; } } } dev = MKDEV(MISC_MAJOR, misc->minor); misc->this_device = device_create_with_groups(&misc_class, misc->parent, dev, misc, misc->groups, "%s", misc->name); if (IS_ERR(misc->this_device)) { if (is_dynamic) { misc_minor_free(misc->minor); misc->minor = MISC_DYNAMIC_MINOR; } err = PTR_ERR(misc->this_device); goto out; } /* * Add it to the front, so that later devices can "override" * earlier defaults */ list_add(&misc->list, &misc_list); out: mutex_unlock(&misc_mtx); return err; } EXPORT_SYMBOL(misc_register); /** * misc_deregister - unregister a miscellaneous device * @misc: device to unregister * * Unregister a miscellaneous device that was previously * successfully registered with misc_register(). */ void misc_deregister(struct miscdevice *misc) { if (WARN_ON(list_empty(&misc->list))) return; mutex_lock(&misc_mtx); list_del(&misc->list); device_destroy(&misc_class, MKDEV(MISC_MAJOR, misc->minor)); misc_minor_free(misc->minor); mutex_unlock(&misc_mtx); } EXPORT_SYMBOL(misc_deregister); static int __init misc_init(void) { int err; struct proc_dir_entry *ret; ret = proc_create_seq("misc", 0, NULL, &misc_seq_ops); err = class_register(&misc_class); if (err) goto fail_remove; err = -EIO; if (register_chrdev(MISC_MAJOR, "misc", &misc_fops)) goto fail_printk; return 0; fail_printk: pr_err("unable to get major %d for misc devices\n", MISC_MAJOR); class_unregister(&misc_class); fail_remove: if (ret) remove_proc_entry("misc", NULL); return err; } subsys_initcall(misc_init); |
3 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 21 21 3 3 3 3 3 3 4 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2016 Facebook * Copyright (C) 2013-2014 Jens Axboe */ #include <linux/sched.h> #include <linux/random.h> #include <linux/sbitmap.h> #include <linux/seq_file.h> static int init_alloc_hint(struct sbitmap *sb, gfp_t flags) { unsigned depth = sb->depth; sb->alloc_hint = alloc_percpu_gfp(unsigned int, flags); if (!sb->alloc_hint) return -ENOMEM; if (depth && !sb->round_robin) { int i; for_each_possible_cpu(i) *per_cpu_ptr(sb->alloc_hint, i) = get_random_u32_below(depth); } return 0; } static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb, unsigned int depth) { unsigned hint; hint = this_cpu_read(*sb->alloc_hint); if (unlikely(hint >= depth)) { hint = depth ? get_random_u32_below(depth) : 0; this_cpu_write(*sb->alloc_hint, hint); } return hint; } static inline void update_alloc_hint_after_get(struct sbitmap *sb, unsigned int depth, unsigned int hint, unsigned int nr) { if (nr == -1) { /* If the map is full, a hint won't do us much good. */ this_cpu_write(*sb->alloc_hint, 0); } else if (nr == hint || unlikely(sb->round_robin)) { /* Only update the hint if we used it. */ hint = nr + 1; if (hint >= depth - 1) hint = 0; this_cpu_write(*sb->alloc_hint, hint); } } /* * See if we have deferred clears that we can batch move */ static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, unsigned int depth, unsigned int alloc_hint, bool wrap) { unsigned long mask, word_mask; guard(raw_spinlock_irqsave)(&map->swap_lock); if (!map->cleared) { if (depth == 0) return false; word_mask = (~0UL) >> (BITS_PER_LONG - depth); /* * The current behavior is to always retry after moving * ->cleared to word, and we change it to retry in case * of any free bits. To avoid an infinite loop, we need * to take wrap & alloc_hint into account, otherwise a * soft lockup may occur. */ if (!wrap && alloc_hint) word_mask &= ~((1UL << alloc_hint) - 1); return (READ_ONCE(map->word) & word_mask) != word_mask; } /* * First get a stable cleared mask, setting the old mask to 0. */ mask = xchg(&map->cleared, 0); /* * Now clear the masked bits in our free word */ atomic_long_andnot(mask, (atomic_long_t *)&map->word); BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); return true; } int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint) { unsigned int bits_per_word; int i; if (shift < 0) shift = sbitmap_calculate_shift(depth); bits_per_word = 1U << shift; if (bits_per_word > BITS_PER_LONG) return -EINVAL; sb->shift = shift; sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); sb->round_robin = round_robin; if (depth == 0) { sb->map = NULL; return 0; } if (alloc_hint) { if (init_alloc_hint(sb, flags)) return -ENOMEM; } else { sb->alloc_hint = NULL; } sb->map = kvzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); if (!sb->map) { free_percpu(sb->alloc_hint); return -ENOMEM; } for (i = 0; i < sb->map_nr; i++) raw_spin_lock_init(&sb->map[i].swap_lock); return 0; } EXPORT_SYMBOL_GPL(sbitmap_init_node); void sbitmap_resize(struct sbitmap *sb, unsigned int depth) { unsigned int bits_per_word = 1U << sb->shift; unsigned int i; for (i = 0; i < sb->map_nr; i++) sbitmap_deferred_clear(&sb->map[i], 0, 0, 0); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); } EXPORT_SYMBOL_GPL(sbitmap_resize); static int __sbitmap_get_word(unsigned long *word, unsigned long depth, unsigned int hint, bool wrap) { int nr; /* don't wrap if starting from 0 */ wrap = wrap && hint; while (1) { nr = find_next_zero_bit(word, depth, hint); if (unlikely(nr >= depth)) { /* * We started with an offset, and we didn't reset the * offset to 0 in a failure case, so start from 0 to * exhaust the map. */ if (hint && wrap) { hint = 0; continue; } return -1; } if (!test_and_set_bit_lock(nr, word)) break; hint = nr + 1; if (hint >= depth - 1) hint = 0; } return nr; } static int sbitmap_find_bit_in_word(struct sbitmap_word *map, unsigned int depth, unsigned int alloc_hint, bool wrap) { int nr; do { nr = __sbitmap_get_word(&map->word, depth, alloc_hint, wrap); if (nr != -1) break; if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap)) break; } while (1); return nr; } static int sbitmap_find_bit(struct sbitmap *sb, unsigned int depth, unsigned int index, unsigned int alloc_hint, bool wrap) { unsigned int i; int nr = -1; for (i = 0; i < sb->map_nr; i++) { nr = sbitmap_find_bit_in_word(&sb->map[index], min_t(unsigned int, __map_depth(sb, index), depth), alloc_hint, wrap); if (nr != -1) { nr += index << sb->shift; break; } /* Jump to next index. */ alloc_hint = 0; if (++index >= sb->map_nr) index = 0; } return nr; } static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) { unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); /* * Unless we're doing round robin tag allocation, just use the * alloc_hint to find the right word index. No point in looping * twice in find_next_zero_bit() for that case. */ if (sb->round_robin) alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); else alloc_hint = 0; return sbitmap_find_bit(sb, UINT_MAX, index, alloc_hint, !sb->round_robin); } int sbitmap_get(struct sbitmap *sb) { int nr; unsigned int hint, depth; if (WARN_ON_ONCE(unlikely(!sb->alloc_hint))) return -1; depth = READ_ONCE(sb->depth); hint = update_alloc_hint_before_get(sb, depth); nr = __sbitmap_get(sb, hint); update_alloc_hint_after_get(sb, depth, hint, nr); return nr; } EXPORT_SYMBOL_GPL(sbitmap_get); static int __sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth) { unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) { int nr; unsigned int hint, depth; if (WARN_ON_ONCE(unlikely(!sb->alloc_hint))) return -1; depth = READ_ONCE(sb->depth); hint = update_alloc_hint_before_get(sb, depth); nr = __sbitmap_get_shallow(sb, hint, shallow_depth); update_alloc_hint_after_get(sb, depth, hint, nr); return nr; } EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) { unsigned int i; for (i = 0; i < sb->map_nr; i++) { if (sb->map[i].word & ~sb->map[i].cleared) return true; } return false; } EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) { unsigned int i, weight = 0; for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; unsigned int word_depth = __map_depth(sb, i); if (set) weight += bitmap_weight(&word->word, word_depth); else weight += bitmap_weight(&word->cleared, word_depth); } return weight; } static unsigned int sbitmap_cleared(const struct sbitmap *sb) { return __sbitmap_weight(sb, false); } unsigned int sbitmap_weight(const struct sbitmap *sb) { return __sbitmap_weight(sb, true) - sbitmap_cleared(sb); } EXPORT_SYMBOL_GPL(sbitmap_weight); void sbitmap_show(struct sbitmap *sb, struct seq_file *m) { seq_printf(m, "depth=%u\n", sb->depth); seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); seq_printf(m, "map_nr=%u\n", sb->map_nr); } EXPORT_SYMBOL_GPL(sbitmap_show); static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) { if ((offset & 0xf) == 0) { if (offset != 0) seq_putc(m, '\n'); seq_printf(m, "%08x:", offset); } if ((offset & 0x1) == 0) seq_putc(m, ' '); seq_printf(m, "%02x", byte); } void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) { u8 byte = 0; unsigned int byte_bits = 0; unsigned int offset = 0; int i; for (i = 0; i < sb->map_nr; i++) { unsigned long word = READ_ONCE(sb->map[i].word); unsigned long cleared = READ_ONCE(sb->map[i].cleared); unsigned int word_bits = __map_depth(sb, i); word &= ~cleared; while (word_bits > 0) { unsigned int bits = min(8 - byte_bits, word_bits); byte |= (word & (BIT(bits) - 1)) << byte_bits; byte_bits += bits; if (byte_bits == 8) { emit_byte(m, offset, byte); byte = 0; byte_bits = 0; offset++; } word >>= bits; word_bits -= bits; } } if (byte_bits) { emit_byte(m, offset, byte); offset++; } if (offset) seq_putc(m, '\n'); } EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; unsigned int shallow_depth; /* * Each full word of the bitmap has bits_per_word bits, and there might * be a partial word. There are depth / bits_per_word full words and * depth % bits_per_word bits left over. In bitwise arithmetic: * * bits_per_word = 1 << shift * depth / bits_per_word = depth >> shift * depth % bits_per_word = depth & ((1 << shift) - 1) * * Each word can be limited to sbq->min_shallow_depth bits. */ shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); depth = ((depth >> sbq->sb.shift) * shallow_depth + min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, SBQ_WAKE_BATCH); return wake_batch; } int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int shift, bool round_robin, gfp_t flags, int node) { int ret; int i; ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node, round_robin, true); if (ret) return ret; sbq->min_shallow_depth = UINT_MAX; sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); atomic_set(&sbq->ws_active, 0); atomic_set(&sbq->completion_cnt, 0); atomic_set(&sbq->wakeup_cnt, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { sbitmap_free(&sbq->sb); return -ENOMEM; } for (i = 0; i < SBQ_WAIT_QUEUES; i++) init_waitqueue_head(&sbq->ws[i].wait); return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; wake_batch = sbq_calc_wake_batch(sbq, depth); if (sbq->wake_batch != wake_batch) WRITE_ONCE(sbq->wake_batch, wake_batch); } void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; unsigned int depth = (sbq->sb.depth + users - 1) / users; wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, 1, SBQ_WAKE_BATCH); WRITE_ONCE(sbq->wake_batch, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { sbitmap_queue_update_wake_batch(sbq, depth); sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); int __sbitmap_queue_get(struct sbitmap_queue *sbq) { return sbitmap_get(&sbq->sb); } EXPORT_SYMBOL_GPL(__sbitmap_queue_get); unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset) { struct sbitmap *sb = &sbq->sb; unsigned int hint, depth; unsigned long index, nr; int i; if (unlikely(sb->round_robin)) return 0; depth = READ_ONCE(sb->depth); hint = update_alloc_hint_before_get(sb, depth); index = SB_NR_TO_INDEX(sb, hint); for (i = 0; i < sb->map_nr; i++) { struct sbitmap_word *map = &sb->map[index]; unsigned long get_mask; unsigned int map_depth = __map_depth(sb, index); unsigned long val; sbitmap_deferred_clear(map, 0, 0, 0); val = READ_ONCE(map->word); if (val == (1UL << (map_depth - 1)) - 1) goto next; nr = find_first_zero_bit(&val, map_depth); if (nr + nr_tags <= map_depth) { atomic_long_t *ptr = (atomic_long_t *) &map->word; get_mask = ((1UL << nr_tags) - 1) << nr; while (!atomic_long_try_cmpxchg(ptr, &val, get_mask | val)) ; get_mask = (get_mask & ~val) >> nr; if (get_mask) { *offset = nr + (index << sb->shift); update_alloc_hint_after_get(sb, depth, hint, *offset + nr_tags - 1); return get_mask; } } next: /* Jump to next index. */ if (++index >= sb->map_nr) index = 0; } return 0; } int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth) { WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); return sbitmap_get_shallow(&sbq->sb, shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow); void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth) { sbq->min_shallow_depth = min_shallow_depth; sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { int i, wake_index, woken; if (!atomic_read(&sbq->ws_active)) return; wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; /* * Advance the index before checking the current queue. * It improves fairness, by ensuring the queue doesn't * need to be fully emptied before trying to wake up * from the next one. */ wake_index = sbq_index_inc(wake_index); if (waitqueue_active(&ws->wait)) { woken = wake_up_nr(&ws->wait, nr); if (woken == nr) break; nr -= woken; } } if (wake_index != atomic_read(&sbq->wake_index)) atomic_set(&sbq->wake_index, wake_index); } void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { unsigned int wake_batch = READ_ONCE(sbq->wake_batch); unsigned int wakeups; if (!atomic_read(&sbq->ws_active)) return; atomic_add(nr, &sbq->completion_cnt); wakeups = atomic_read(&sbq->wakeup_cnt); do { if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) return; } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, &wakeups, wakeups + wake_batch)); __sbitmap_queue_wake_up(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag) { if (likely(!sb->round_robin && tag < sb->depth)) data_race(*per_cpu_ptr(sb->alloc_hint, cpu) = tag); } void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, int *tags, int nr_tags) { struct sbitmap *sb = &sbq->sb; unsigned long *addr = NULL; unsigned long mask = 0; int i; smp_mb__before_atomic(); for (i = 0; i < nr_tags; i++) { const int tag = tags[i] - offset; unsigned long *this_addr; /* since we're clearing a batch, skip the deferred map */ this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word; if (!addr) { addr = this_addr; } else if (addr != this_addr) { atomic_long_andnot(mask, (atomic_long_t *) addr); mask = 0; addr = this_addr; } mask |= (1UL << SB_NR_TO_BIT(sb, tag)); } if (mask) atomic_long_andnot(mask, (atomic_long_t *) addr); smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq, nr_tags); sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), tags[nr_tags - 1] - offset); } void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { /* * Once the clear bit is set, the bit may be allocated out. * * Orders READ/WRITE on the associated instance(such as request * of blk_mq) by this bit for avoiding race with re-allocation, * and its pair is the memory barrier implied in __sbitmap_get_word. * * One invariant is that the clear bit has to be zero when the bit * is in use. */ smp_mb__before_atomic(); sbitmap_deferred_clear_bit(&sbq->sb, nr); /* * Pairs with the memory barrier in set_current_state() to ensure the * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the * waiter. See the comment on waitqueue_active(). */ smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq, 1); sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) { int i, wake_index; /* * Pairs with the memory barrier in set_current_state() like in * sbitmap_queue_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; if (waitqueue_active(&ws->wait)) wake_up(&ws->wait); wake_index = sbq_index_inc(wake_index); } } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) { bool first; int i; sbitmap_show(&sbq->sb, m); seq_puts(m, "alloc_hint={"); first = true; for_each_possible_cpu(i) { if (!first) seq_puts(m, ", "); first = false; seq_printf(m, "%u", *per_cpu_ptr(sbq->sb.alloc_hint, i)); } seq_puts(m, "}\n"); seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[i]; seq_printf(m, "\t{.wait=%s},\n", waitqueue_active(&ws->wait) ? "active" : "inactive"); } seq_puts(m, "}\n"); seq_printf(m, "round_robin=%d\n", sbq->sb.round_robin); seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait) { if (!sbq_wait->sbq) { sbq_wait->sbq = sbq; atomic_inc(&sbq->ws_active); add_wait_queue(&ws->wait, &sbq_wait->wait); } } EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) { list_del_init(&sbq_wait->wait.entry); if (sbq_wait->sbq) { atomic_dec(&sbq_wait->sbq->ws_active); sbq_wait->sbq = NULL; } } EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait, int state) { if (!sbq_wait->sbq) { atomic_inc(&sbq->ws_active); sbq_wait->sbq = sbq; } prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); } EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait) { finish_wait(&ws->wait, &sbq_wait->wait); if (sbq_wait->sbq) { atomic_dec(&sbq->ws_active); sbq_wait->sbq = NULL; } } EXPORT_SYMBOL_GPL(sbitmap_finish_wait); |
872 873 175 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 | // SPDX-License-Identifier: GPL-2.0-or-later /* * vrf.c: device driver to encapsulate a VRF space * * Copyright (c) 2015 Cumulus Networks. All rights reserved. * Copyright (c) 2015 Shrijeet Mukherjee <shm@cumulusnetworks.com> * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> * * Based on dummy, team and ipvlan drivers */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ip.h> #include <linux/init.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> #include <linux/rtnetlink.h> #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> #include <linux/hashtable.h> #include <linux/spinlock_types.h> #include <linux/inetdevice.h> #include <net/arp.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/route.h> #include <net/addrconf.h> #include <net/l3mdev.h> #include <net/fib_rules.h> #include <net/sch_generic.h> #include <net/netns/generic.h> #include <net/netfilter/nf_conntrack.h> #include <net/inet_dscp.h> #define DRV_NAME "vrf" #define DRV_VERSION "1.1" #define FIB_RULE_PREF 1000 /* default preference for FIB rules */ #define HT_MAP_BITS 4 #define HASH_INITVAL ((u32)0xcafef00d) struct vrf_map { DECLARE_HASHTABLE(ht, HT_MAP_BITS); spinlock_t vmap_lock; /* shared_tables: * count how many distinct tables do not comply with the strict mode * requirement. * shared_tables value must be 0 in order to enable the strict mode. * * example of the evolution of shared_tables: * | time * add vrf0 --> table 100 shared_tables = 0 | t0 * add vrf1 --> table 101 shared_tables = 0 | t1 * add vrf2 --> table 100 shared_tables = 1 | t2 * add vrf3 --> table 100 shared_tables = 1 | t3 * add vrf4 --> table 101 shared_tables = 2 v t4 * * shared_tables is a "step function" (or "staircase function") * and it is increased by one when the second vrf is associated to a * table. * * at t2, vrf0 and vrf2 are bound to table 100: shared_tables = 1. * * at t3, another dev (vrf3) is bound to the same table 100 but the * value of shared_tables is still 1. * This means that no matter how many new vrfs will register on the * table 100, the shared_tables will not increase (considering only * table 100). * * at t4, vrf4 is bound to table 101, and shared_tables = 2. * * Looking at the value of shared_tables we can immediately know if * the strict_mode can or cannot be enforced. Indeed, strict_mode * can be enforced iff shared_tables = 0. * * Conversely, shared_tables is decreased when a vrf is de-associated * from a table with exactly two associated vrfs. */ u32 shared_tables; bool strict_mode; }; struct vrf_map_elem { struct hlist_node hnode; struct list_head vrf_list; /* VRFs registered to this table */ u32 table_id; int users; int ifindex; }; static unsigned int vrf_net_id; /* per netns vrf data */ struct netns_vrf { /* protected by rtnl lock */ bool add_fib_rules; struct vrf_map vmap; struct ctl_table_header *ctl_hdr; }; struct net_vrf { struct rtable __rcu *rth; struct rt6_info __rcu *rt6; #if IS_ENABLED(CONFIG_IPV6) struct fib6_table *fib6_table; #endif u32 tb_id; struct list_head me_list; /* entry in vrf_map_elem */ int ifindex; }; static void vrf_rx_stats(struct net_device *dev, int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->rx_packets); u64_stats_add(&dstats->rx_bytes, len); u64_stats_update_end(&dstats->syncp); } static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; kfree_skb(skb); } static struct vrf_map *netns_vrf_map(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); return &nn_vrf->vmap; } static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev) { return netns_vrf_map(dev_net(dev)); } static int vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem *me) { struct list_head *me_head = &me->vrf_list; struct net_vrf *vrf; if (list_empty(me_head)) return -ENODEV; vrf = list_first_entry(me_head, struct net_vrf, me_list); return vrf->ifindex; } static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags) { struct vrf_map_elem *me; me = kmalloc(sizeof(*me), flags); if (!me) return NULL; return me; } static void vrf_map_elem_free(struct vrf_map_elem *me) { kfree(me); } static void vrf_map_elem_init(struct vrf_map_elem *me, int table_id, int ifindex, int users) { me->table_id = table_id; me->ifindex = ifindex; me->users = users; INIT_LIST_HEAD(&me->vrf_list); } static struct vrf_map_elem *vrf_map_lookup_elem(struct vrf_map *vmap, u32 table_id) { struct vrf_map_elem *me; u32 key; key = jhash_1word(table_id, HASH_INITVAL); hash_for_each_possible(vmap->ht, me, hnode, key) { if (me->table_id == table_id) return me; } return NULL; } static void vrf_map_add_elem(struct vrf_map *vmap, struct vrf_map_elem *me) { u32 table_id = me->table_id; u32 key; key = jhash_1word(table_id, HASH_INITVAL); hash_add(vmap->ht, &me->hnode, key); } static void vrf_map_del_elem(struct vrf_map_elem *me) { hash_del(&me->hnode); } static void vrf_map_lock(struct vrf_map *vmap) __acquires(&vmap->vmap_lock) { spin_lock(&vmap->vmap_lock); } static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock) { spin_unlock(&vmap->vmap_lock); } /* called with rtnl lock held */ static int vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vrf_map *vmap = netns_vrf_map_by_dev(dev); struct net_vrf *vrf = netdev_priv(dev); struct vrf_map_elem *new_me, *me; u32 table_id = vrf->tb_id; bool free_new_me = false; int users; int res; /* we pre-allocate elements used in the spin-locked section (so that we * keep the spinlock as short as possible). */ new_me = vrf_map_elem_alloc(GFP_KERNEL); if (!new_me) return -ENOMEM; vrf_map_elem_init(new_me, table_id, dev->ifindex, 0); vrf_map_lock(vmap); me = vrf_map_lookup_elem(vmap, table_id); if (!me) { me = new_me; vrf_map_add_elem(vmap, me); goto link_vrf; } /* we already have an entry in the vrf_map, so it means there is (at * least) a vrf registered on the specific table. */ free_new_me = true; if (vmap->strict_mode) { /* vrfs cannot share the same table */ NL_SET_ERR_MSG(extack, "Table is used by another VRF"); res = -EBUSY; goto unlock; } link_vrf: users = ++me->users; if (users == 2) ++vmap->shared_tables; list_add(&vrf->me_list, &me->vrf_list); res = 0; unlock: vrf_map_unlock(vmap); /* clean-up, if needed */ if (free_new_me) vrf_map_elem_free(new_me); return res; } /* called with rtnl lock held */ static void vrf_map_unregister_dev(struct net_device *dev) { struct vrf_map *vmap = netns_vrf_map_by_dev(dev); struct net_vrf *vrf = netdev_priv(dev); u32 table_id = vrf->tb_id; struct vrf_map_elem *me; int users; vrf_map_lock(vmap); me = vrf_map_lookup_elem(vmap, table_id); if (!me) goto unlock; list_del(&vrf->me_list); users = --me->users; if (users == 1) { --vmap->shared_tables; } else if (users == 0) { vrf_map_del_elem(me); /* no one will refer to this element anymore */ vrf_map_elem_free(me); } unlock: vrf_map_unlock(vmap); } /* return the vrf device index associated with the table_id */ static int vrf_ifindex_lookup_by_table_id(struct net *net, u32 table_id) { struct vrf_map *vmap = netns_vrf_map(net); struct vrf_map_elem *me; int ifindex; vrf_map_lock(vmap); if (!vmap->strict_mode) { ifindex = -EPERM; goto unlock; } me = vrf_map_lookup_elem(vmap, table_id); if (!me) { ifindex = -ENODEV; goto unlock; } ifindex = vrf_map_elem_get_vrf_ifindex(me); unlock: vrf_map_unlock(vmap); return ifindex; } /* by default VRF devices do not have a qdisc and are expected * to be created with only a single queue. */ static bool qdisc_tx_is_default(const struct net_device *dev) { struct netdev_queue *txq; struct Qdisc *qdisc; if (dev->num_tx_queues > 1) return false; txq = netdev_get_tx_queue(dev, 0); qdisc = rcu_access_pointer(txq->qdisc); return !qdisc->enqueue; } /* Local traffic destined to local address. Reinsert the packet to rx * path, similar to loopback handling. */ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst) { int len = skb->len; skb_orphan(skb); skb_dst_set(skb, dst); /* set pkt_type to avoid skb hitting packet taps twice - * once on Tx and again in Rx processing */ skb->pkt_type = PACKET_LOOPBACK; skb->protocol = eth_type_trans(skb, dev); if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { vrf_rx_stats(dev, len); } else { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->rx_drops); u64_stats_update_end(&dstats->syncp); } return NETDEV_TX_OK; } static void vrf_nf_set_untracked(struct sk_buff *skb) { if (skb_get_nfct(skb) == 0) nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } static void vrf_nf_reset_ct(struct sk_buff *skb) { if (skb_get_nfct(skb) == IP_CT_UNTRACKED) nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) static int vrf_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; vrf_nf_reset_ct(skb); err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { const struct ipv6hdr *iph; struct net *net = dev_net(skb->dev); struct flowi6 fl6; int ret = NET_XMIT_DROP; struct dst_entry *dst; struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) goto err; iph = ipv6_hdr(skb); memset(&fl6, 0, sizeof(fl6)); /* needed to match OIF rule */ fl6.flowi6_l3mdev = dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = iph->nexthdr; dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst) || dst == dst_null) goto err; skb_dst_drop(skb); /* if dst.dev is the VRF device again this is locally originated traffic * destined to a local address. Short circuit to Rx path. */ if (dst->dev == dev) return vrf_local_xmit(skb, dev, dst); skb_dst_set(skb, dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; return ret; err: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } #else static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { vrf_tx_error(dev, skb); return NET_XMIT_DROP; } #endif /* based on ip_local_out; can't use it b/c the dst is switched pointing to us */ static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; vrf_nf_reset_ct(skb); err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { struct iphdr *ip4h; int ret = NET_XMIT_DROP; struct flowi4 fl4; struct net *net = dev_net(vrf_dev); struct rtable *rt; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) goto err; ip4h = ip_hdr(skb); memset(&fl4, 0, sizeof(fl4)); /* needed to match OIF rule */ fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.flowi4_tos = ip4h->tos & INET_DSCP_MASK; fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; skb_dst_drop(skb); /* if dst.dev is the VRF device again this is locally originated traffic * destined to a local address. Short circuit to Rx path. */ if (rt->dst.dev == vrf_dev) return vrf_local_xmit(skb, vrf_dev, &rt->dst); skb_dst_set(skb, &rt->dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); if (!ip4h->saddr) { ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0, RT_SCOPE_LINK); } memset(IPCB(skb), 0, sizeof(*IPCB(skb))); ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; out: return ret; err: vrf_tx_error(vrf_dev, skb); goto out; } static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) { switch (skb->protocol) { case htons(ETH_P_IP): return vrf_process_v4_outbound(skb, dev); case htons(ETH_P_IPV6): return vrf_process_v6_outbound(skb, dev); default: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } } static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); u64_stats_update_begin(&dstats->syncp); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { u64_stats_inc(&dstats->tx_packets); u64_stats_add(&dstats->tx_bytes, len); } else { u64_stats_inc(&dstats->tx_drops); } u64_stats_update_end(&dstats->syncp); return ret; } static void vrf_finish_direct(struct sk_buff *skb) { struct net_device *vrf_dev = skb->dev; if (!list_empty(&vrf_dev->ptype_all) && likely(skb_headroom(skb) >= ETH_HLEN)) { struct ethhdr *eth = skb_push(skb, ETH_HLEN); ether_addr_copy(eth->h_source, vrf_dev->dev_addr); eth_zero_addr(eth->h_dest); eth->h_proto = skb->protocol; dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, ETH_HLEN); } vrf_nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) /* modelled after ip6_finish_output2 */ static int vrf_finish_output6(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; const struct in6_addr *nexthop; struct neighbour *neigh; int ret; vrf_nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); rcu_read_unlock(); return ret; } rcu_read_unlock(); IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } /* modelled after ip6_output */ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, vrf_finish_output6, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } /* set dst on skb to send packet to us via dev_xmit path. Allows * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rt6_info *rt6; rcu_read_lock(); rt6 = rcu_dereference(vrf->rt6); if (likely(rt6)) { dst = &rt6->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) { vrf_tx_error(vrf_dev, skb); return NULL; } skb_dst_drop(skb); skb_dst_set(skb, dst); return skb; } static int vrf_output6_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { vrf_finish_direct(skb); return vrf_ip6_local_out(net, sk, skb); } static int vrf_output6_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { int err = 1; skb->protocol = htons(ETH_P_IPV6); if (!(IPCB(skb)->flags & IPSKB_REROUTED)) err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb->dev, vrf_output6_direct_finish); if (likely(err == 1)) vrf_finish_direct(skb); return err; } static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = vrf_output6_direct(net, sk, skb); if (likely(err == 1)) err = vrf_ip6_local_out(net, sk, skb); return err; } static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(vrf_dev); int err; skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); if (likely(err == 1)) err = vrf_output6_direct(net, sk, skb); if (likely(err == 1)) return skb; return NULL; } static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { /* don't divert link scope packets */ if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; vrf_nf_set_untracked(skb); if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); return vrf_ip6_out_redirect(vrf_dev, skb); } /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); struct net *net = dev_net(dev); struct dst_entry *dst; RCU_INIT_POINTER(vrf->rt6, NULL); synchronize_rcu(); /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ if (rt6) { dst = &rt6->dst; netdev_ref_replace(dst->dev, net->loopback_dev, &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } } static int vrf_rt6_create(struct net_device *dev) { int flags = DST_NOPOLICY | DST_NOXFRM; struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); struct rt6_info *rt6; int rc = -ENOMEM; /* IPv6 can be CONFIG enabled and then disabled runtime */ if (!ipv6_mod_enabled()) return 0; vrf->fib6_table = fib6_new_table(net, vrf->tb_id); if (!vrf->fib6_table) goto out; /* create a dst for routing packets out a VRF device */ rt6 = ip6_dst_alloc(net, dev, flags); if (!rt6) goto out; rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); rc = 0; out: return rc; } #else static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { return skb; } static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } static int vrf_rt6_create(struct net_device *dev) { return 0; } #endif /* modelled after ip_finish_output2 */ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = dst_rtable(dst); struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; vrf_nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); if (!skb) { dev->stats.tx_errors++; return -ENOMEM; } } rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int ret; sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock(); return ret; } rcu_read_unlock(); vrf_tx_error(skb->dev, skb); return -EINVAL; } static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, dev, vrf_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } /* set dst on skb to send packet to us via dev_xmit path. Allows * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rtable *rth; rcu_read_lock(); rth = rcu_dereference(vrf->rth); if (likely(rth)) { dst = &rth->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) { vrf_tx_error(vrf_dev, skb); return NULL; } skb_dst_drop(skb); skb_dst_set(skb, dst); return skb; } static int vrf_output_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { vrf_finish_direct(skb); return vrf_ip_local_out(net, sk, skb); } static int vrf_output_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { int err = 1; skb->protocol = htons(ETH_P_IP); if (!(IPCB(skb)->flags & IPSKB_REROUTED)) err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb->dev, vrf_output_direct_finish); if (likely(err == 1)) vrf_finish_direct(skb); return err; } static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = vrf_output_direct(net, sk, skb); if (likely(err == 1)) err = vrf_ip_local_out(net, sk, skb); return err; } static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(vrf_dev); int err; skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip_out_direct_finish); if (likely(err == 1)) err = vrf_output_direct(net, sk, skb); if (likely(err == 1)) return skb; return NULL; } static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { /* don't divert multicast or local broadcast */ if (ipv4_is_multicast(ip_hdr(skb)->daddr) || ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; vrf_nf_set_untracked(skb); if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); return vrf_ip_out_redirect(vrf_dev, skb); } /* called with rcu lock held */ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb, u16 proto) { switch (proto) { case AF_INET: return vrf_ip_out(vrf_dev, sk, skb); case AF_INET6: return vrf_ip6_out(vrf_dev, sk, skb); } return skb; } /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { struct rtable *rth = rtnl_dereference(vrf->rth); struct net *net = dev_net(dev); struct dst_entry *dst; RCU_INIT_POINTER(vrf->rth, NULL); synchronize_rcu(); /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ if (rth) { dst = &rth->dst; netdev_ref_replace(dst->dev, net->loopback_dev, &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } } static int vrf_rtable_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); struct rtable *rth; if (!fib_new_table(dev_net(dev), vrf->tb_id)) return -ENOMEM; /* create a dst for routing packets out through a VRF device */ rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1); if (!rth) return -ENOMEM; rth->dst.output = vrf_output; rcu_assign_pointer(vrf->rth, rth); return 0; } /**************************** device handling ********************/ /* cycle interface to flush neighbor cache and move routes across tables */ static void cycle_netdev(struct net_device *dev, struct netlink_ext_ack *extack) { unsigned int flags = dev->flags; int ret; if (!netif_running(dev)) return; ret = dev_change_flags(dev, flags & ~IFF_UP, extack); if (ret >= 0) ret = dev_change_flags(dev, flags, extack); if (ret < 0) { netdev_err(dev, "Failed to cycle device %s; route tables might be wrong!\n", dev->name); } } static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { int ret; /* do not allow loopback device to be enslaved to a VRF. * The vrf device acts as the loopback for the vrf. */ if (port_dev == dev_net(dev)->loopback_dev) { NL_SET_ERR_MSG(extack, "Can not enslave loopback device to a VRF"); return -EOPNOTSUPP; } port_dev->priv_flags |= IFF_L3MDEV_SLAVE; ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack); if (ret < 0) goto err; cycle_netdev(port_dev, extack); return 0; err: port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { if (netif_is_l3_master(port_dev)) { NL_SET_ERR_MSG(extack, "Can not enslave an L3 master device to a VRF"); return -EINVAL; } if (netif_is_l3_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev, extack); } /* inverse of do_vrf_add_slave */ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; cycle_netdev(port_dev, NULL); return 0; } static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { return do_vrf_del_slave(dev, port_dev); } static void vrf_dev_uninit(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; dev->flags = IFF_MASTER | IFF_NOARP; /* similarly, oper state is irrelevant; set to up to avoid confusion */ dev->operstate = IF_OPER_UP; netdev_lockdep_set_classes(dev); return 0; out_rth: vrf_rtable_release(dev, vrf); out_nomem: return -ENOMEM; } static const struct net_device_ops vrf_netdev_ops = { .ndo_init = vrf_dev_init, .ndo_uninit = vrf_dev_uninit, .ndo_start_xmit = vrf_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_add_slave = vrf_add_slave, .ndo_del_slave = vrf_del_slave, }; static u32 vrf_fib_table(const struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); return vrf->tb_id; } static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1) skb = NULL; /* kfree_skb(skb) handled by nf code */ return skb; } static int vrf_prepare_mac_header(struct sk_buff *skb, struct net_device *vrf_dev, u16 proto) { struct ethhdr *eth; int err; /* in general, we do not know if there is enough space in the head of * the packet for hosting the mac header. */ err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev)); if (unlikely(err)) /* no space in the skb head */ return -ENOBUFS; __skb_push(skb, ETH_HLEN); eth = (struct ethhdr *)skb->data; skb_reset_mac_header(skb); skb_reset_mac_len(skb); /* we set the ethernet destination and the source addresses to the * address of the VRF device. */ ether_addr_copy(eth->h_dest, vrf_dev->dev_addr); ether_addr_copy(eth->h_source, vrf_dev->dev_addr); eth->h_proto = htons(proto); /* the destination address of the Ethernet frame corresponds to the * address set on the VRF interface; therefore, the packet is intended * to be processed locally. */ skb->protocol = eth->h_proto; skb->pkt_type = PACKET_HOST; skb_postpush_rcsum(skb, skb->data, ETH_HLEN); skb_pull_inline(skb, ETH_HLEN); return 0; } /* prepare and add the mac header to the packet if it was not set previously. * In this way, packet sniffers such as tcpdump can parse the packet correctly. * If the mac header was already set, the original mac header is left * untouched and the function returns immediately. */ static int vrf_add_mac_header_if_unset(struct sk_buff *skb, struct net_device *vrf_dev, u16 proto, struct net_device *orig_dev) { if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev)) return 0; return vrf_prepare_mac_header(skb, vrf_dev, proto); } #if IS_ENABLED(CONFIG_IPV6) /* neighbor handling is done with actual device; do not want * to flip skb->dev for those ndisc packets. This really fails * for multiple next protocols (e.g., NEXTHDR_HOP). But it is * a start. */ static bool ipv6_ndisc_frame(const struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); bool rc = false; if (iph->nexthdr == NEXTHDR_ICMP) { const struct icmp6hdr *icmph; struct icmp6hdr _icmph; icmph = skb_header_pointer(skb, sizeof(*iph), sizeof(_icmph), &_icmph); if (!icmph) goto out; switch (icmph->icmp6_type) { case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: rc = true; break; } } out: return rc; } static struct rt6_info *vrf_ip6_route_lookup(struct net *net, const struct net_device *dev, struct flowi6 *fl6, int ifindex, const struct sk_buff *skb, int flags) { struct net_vrf *vrf = netdev_priv(dev); return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, int ifindex) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct flowi6 fl6 = { .flowi6_iif = ifindex, .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), }; struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) return; if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst)) return; skb_dst_set(skb, &rt6->dst); } static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { int orig_iif = skb->skb_iif; bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For non-loopback strict packets, determine the dst using the original * ifindex. */ if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; if (skb->pkt_type == PACKET_LOOPBACK) skb->pkt_type = PACKET_HOST; else vrf_ip6_input_dst(skb, vrf_dev, orig_iif); goto out; } /* if packet is NDISC then keep the ingress interface */ if (!is_ndisc) { struct net_device *orig_dev = skb->dev; vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; if (!list_empty(&vrf_dev->ptype_all)) { int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IPV6, orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); } } IP6CB(skb)->flags |= IP6SKB_L3SLAVE; } if (need_strict) vrf_ip6_input_dst(skb, vrf_dev, orig_iif); skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } #else static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { return skb; } #endif static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_device *orig_dev = skb->dev; skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb */ if (skb->pkt_type == PACKET_LOOPBACK) { skb->pkt_type = PACKET_HOST; goto out; } vrf_rx_stats(vrf_dev, skb->len); if (!list_empty(&vrf_dev->ptype_all)) { int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP, orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); } } skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } /* called with rcu lock held */ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, struct sk_buff *skb, u16 proto) { switch (proto) { case AF_INET: return vrf_ip_rcv(vrf_dev, skb); case AF_INET6: return vrf_ip6_rcv(vrf_dev, skb); } return skb; } #if IS_ENABLED(CONFIG_IPV6) /* send to link-local or multicast address via interface enslaved to * VRF device. Force lookup to VRF table without changing flow struct * Note: Caller to this function must hold rcu_read_lock() and no refcnt * is taken on the dst by this function. */ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, struct flowi6 *fl6) { struct net *net = dev_net(dev); int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF; struct dst_entry *dst = NULL; struct rt6_info *rt; /* VRF device does not have a link-local address and * sending packets to link-local or mcast addresses over * a VRF device does not make sense */ if (fl6->flowi6_oif == dev->ifindex) { dst = &net->ipv6.ip6_null_entry->dst; return dst; } if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags); if (rt) dst = &rt->dst; return dst; } #endif static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_link_scope_lookup = vrf_link_scope_lookup, #endif }; static void vrf_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strscpy(info->driver, DRV_NAME, sizeof(info->driver)); strscpy(info->version, DRV_VERSION, sizeof(info->version)); } static const struct ethtool_ops vrf_ethtool_ops = { .get_drvinfo = vrf_get_drvinfo, }; static inline size_t vrf_fib_rule_nl_size(void) { size_t sz; sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)); sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */ sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */ sz += nla_total_size(sizeof(u8)); /* FRA_PROTOCOL */ return sz; } static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) { struct fib_rule_hdr *frh; struct nlmsghdr *nlh; struct sk_buff *skb; int err; if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) && !ipv6_mod_enabled()) return 0; skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL); if (!skb) return -ENOMEM; nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0); if (!nlh) goto nla_put_failure; /* rule only needs to appear once */ nlh->nlmsg_flags |= NLM_F_EXCL; frh = nlmsg_data(nlh); memset(frh, 0, sizeof(*frh)); frh->family = family; frh->action = FR_ACT_TO_TBL; if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL)) goto nla_put_failure; if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF)) goto nla_put_failure; nlmsg_end(skb, nlh); /* fib_nl_{new,del}rule handling looks for net from skb->sk */ skb->sk = dev_net(dev)->rtnl; if (add_it) { err = fib_nl_newrule(skb, nlh, NULL); if (err == -EEXIST) err = 0; } else { err = fib_nl_delrule(skb, nlh, NULL); if (err == -ENOENT) err = 0; } nlmsg_free(skb); return err; nla_put_failure: nlmsg_free(skb); return -EMSGSIZE; } static int vrf_add_fib_rules(const struct net_device *dev) { int err; err = vrf_fib_rule(dev, AF_INET, true); if (err < 0) goto out_err; err = vrf_fib_rule(dev, AF_INET6, true); if (err < 0) goto ipv6_err; #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true); if (err < 0) goto ipmr_err; #endif #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES) err = vrf_fib_rule(dev, RTNL_FAMILY_IP6MR, true); if (err < 0) goto ip6mr_err; #endif return 0; #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES) ip6mr_err: vrf_fib_rule(dev, RTNL_FAMILY_IPMR, false); #endif #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) ipmr_err: vrf_fib_rule(dev, AF_INET6, false); #endif ipv6_err: vrf_fib_rule(dev, AF_INET, false); out_err: netdev_err(dev, "Failed to add FIB rules.\n"); return err; } static void vrf_setup(struct net_device *dev) { ether_setup(dev); /* Initialize the device structure. */ dev->netdev_ops = &vrf_netdev_ops; dev->l3mdev_ops = &vrf_l3mdev_ops; dev->ethtool_ops = &vrf_ethtool_ops; dev->needs_free_netdev = true; /* Fill in device structure with ethernet-generic values. */ eth_hw_addr_random(dev); /* don't acquire vrf device's netif_tx_lock when transmitting */ dev->lltx = true; /* don't allow vrf devices to change network namespaces. */ dev->netns_local = true; /* does not make sense for a VLAN to be added to a vrf device */ dev->features |= NETIF_F_VLAN_CHALLENGED; /* enable offload features */ dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA; dev->hw_features = dev->features; dev->hw_enc_features = dev->features; /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_RX_HANDLER; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; /* VRF devices do not care about MTU, but if the MTU is set * too low then the ipv4 and ipv6 protocols are disabled * which breaks networking. */ dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { NL_SET_ERR_MSG(extack, "Invalid hardware address"); return -EINVAL; } if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { NL_SET_ERR_MSG(extack, "Invalid hardware address"); return -EADDRNOTAVAIL; } } return 0; } static void vrf_dellink(struct net_device *dev, struct list_head *head) { struct net_device *port_dev; struct list_head *iter; netdev_for_each_lower_dev(dev, port_dev, iter) vrf_del_slave(dev, port_dev); vrf_map_unregister_dev(dev); unregister_netdevice_queue(dev, head); } static int vrf_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct net_vrf *vrf = netdev_priv(dev); struct netns_vrf *nn_vrf; bool *add_fib_rules; struct net *net; int err; if (!data || !data[IFLA_VRF_TABLE]) { NL_SET_ERR_MSG(extack, "VRF table id is missing"); return -EINVAL; } vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); if (vrf->tb_id == RT_TABLE_UNSPEC) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VRF_TABLE], "Invalid VRF table id"); return -EINVAL; } dev->priv_flags |= IFF_L3MDEV_MASTER; err = register_netdevice(dev); if (err) goto out; /* mapping between table_id and vrf; * note: such binding could not be done in the dev init function * because dev->ifindex id is not available yet. */ vrf->ifindex = dev->ifindex; err = vrf_map_register_dev(dev, extack); if (err) { unregister_netdevice(dev); goto out; } net = dev_net(dev); nn_vrf = net_generic(net, vrf_net_id); add_fib_rules = &nn_vrf->add_fib_rules; if (*add_fib_rules) { err = vrf_add_fib_rules(dev); if (err) { vrf_map_unregister_dev(dev); unregister_netdevice(dev); goto out; } *add_fib_rules = false; } out: return err; } static size_t vrf_nl_getsize(const struct net_device *dev) { return nla_total_size(sizeof(u32)); /* IFLA_VRF_TABLE */ } static int vrf_fillinfo(struct sk_buff *skb, const struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); return nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id); } static size_t vrf_get_slave_size(const struct net_device *bond_dev, const struct net_device *slave_dev) { return nla_total_size(sizeof(u32)); /* IFLA_VRF_PORT_TABLE */ } static int vrf_fill_slave_info(struct sk_buff *skb, const struct net_device *vrf_dev, const struct net_device *slave_dev) { struct net_vrf *vrf = netdev_priv(vrf_dev); if (nla_put_u32(skb, IFLA_VRF_PORT_TABLE, vrf->tb_id)) return -EMSGSIZE; return 0; } static const struct nla_policy vrf_nl_policy[IFLA_VRF_MAX + 1] = { [IFLA_VRF_TABLE] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vrf_link_ops __read_mostly = { .kind = DRV_NAME, .priv_size = sizeof(struct net_vrf), .get_size = vrf_nl_getsize, .policy = vrf_nl_policy, .validate = vrf_validate, .fill_info = vrf_fillinfo, .get_slave_size = vrf_get_slave_size, .fill_slave_info = vrf_fill_slave_info, .newlink = vrf_newlink, .dellink = vrf_dellink, .setup = vrf_setup, .maxtype = IFLA_VRF_MAX, }; static int vrf_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* only care about unregister events to drop slave references */ if (event == NETDEV_UNREGISTER) { struct net_device *vrf_dev; if (!netif_is_l3_slave(dev)) goto out; vrf_dev = netdev_master_upper_dev_get(dev); vrf_del_slave(vrf_dev, dev); } out: return NOTIFY_DONE; } static struct notifier_block vrf_notifier_block __read_mostly = { .notifier_call = vrf_device_event, }; static int vrf_map_init(struct vrf_map *vmap) { spin_lock_init(&vmap->vmap_lock); hash_init(vmap->ht); vmap->strict_mode = false; return 0; } #ifdef CONFIG_SYSCTL static bool vrf_strict_mode(struct vrf_map *vmap) { bool strict_mode; vrf_map_lock(vmap); strict_mode = vmap->strict_mode; vrf_map_unlock(vmap); return strict_mode; } static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode) { bool *cur_mode; int res = 0; vrf_map_lock(vmap); cur_mode = &vmap->strict_mode; if (*cur_mode == new_mode) goto unlock; if (*cur_mode) { /* disable strict mode */ *cur_mode = false; } else { if (vmap->shared_tables) { /* we cannot allow strict_mode because there are some * vrfs that share one or more tables. */ res = -EBUSY; goto unlock; } /* no tables are shared among vrfs, so we can go back * to 1:1 association between a vrf with its table. */ *cur_mode = true; } unlock: vrf_map_unlock(vmap); return res; } static int vrf_shared_table_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)table->extra1; struct vrf_map *vmap = netns_vrf_map(net); int proc_strict_mode = 0; struct ctl_table tmp = { .procname = table->procname, .data = &proc_strict_mode, .maxlen = sizeof(int), .mode = table->mode, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }; int ret; if (!write) proc_strict_mode = vrf_strict_mode(vmap); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) ret = vrf_strict_mode_change(vmap, (bool)proc_strict_mode); return ret; } static const struct ctl_table vrf_table[] = { { .procname = "strict_mode", .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = vrf_shared_table_handler, /* set by the vrf_netns_init */ .extra1 = NULL, }, }; static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) { struct ctl_table *table; table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL); if (!table) return -ENOMEM; /* init the extra1 parameter with the reference to current netns */ table[0].extra1 = net; nn_vrf->ctl_hdr = register_net_sysctl_sz(net, "net/vrf", table, ARRAY_SIZE(vrf_table)); if (!nn_vrf->ctl_hdr) { kfree(table); return -ENOMEM; } return 0; } static void vrf_netns_exit_sysctl(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); const struct ctl_table *table; table = nn_vrf->ctl_hdr->ctl_table_arg; unregister_net_sysctl_table(nn_vrf->ctl_hdr); kfree(table); } #else static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) { return 0; } static void vrf_netns_exit_sysctl(struct net *net) { } #endif /* Initialize per network namespace state */ static int __net_init vrf_netns_init(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); nn_vrf->add_fib_rules = true; vrf_map_init(&nn_vrf->vmap); return vrf_netns_init_sysctl(net, nn_vrf); } static void __net_exit vrf_netns_exit(struct net *net) { vrf_netns_exit_sysctl(net); } static struct pernet_operations vrf_net_ops __net_initdata = { .init = vrf_netns_init, .exit = vrf_netns_exit, .id = &vrf_net_id, .size = sizeof(struct netns_vrf), }; static int __init vrf_init_module(void) { int rc; register_netdevice_notifier(&vrf_notifier_block); rc = register_pernet_subsys(&vrf_net_ops); if (rc < 0) goto error; rc = l3mdev_table_lookup_register(L3MDEV_TYPE_VRF, vrf_ifindex_lookup_by_table_id); if (rc < 0) goto unreg_pernet; rc = rtnl_link_register(&vrf_link_ops); if (rc < 0) goto table_lookup_unreg; return 0; table_lookup_unreg: l3mdev_table_lookup_unregister(L3MDEV_TYPE_VRF, vrf_ifindex_lookup_by_table_id); unreg_pernet: unregister_pernet_subsys(&vrf_net_ops); error: unregister_netdevice_notifier(&vrf_notifier_block); return rc; } module_init(vrf_init_module); MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern"); MODULE_DESCRIPTION("Device driver to instantiate VRF domains"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); MODULE_VERSION(DRV_VERSION); |
2 189 2 2 2 2 2 7 1 1 1 3 3 3 2 1 1 1 1 1 2 2 2 3 3 6 6 4 1 2 5 11 11 1 1 1 1 6 7 2 1 1 3 3 3 3 3 3 1 1 2 2 3 3 3 1 1 1 1 10 1 3 8 1 7 5 2 1 1 4 4 5 1 4 5 5 3 3 1 1 1 1 4 1 1 1 2 2 2 3 2 1 1 3 175 174 175 173 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 | /* * Copyright (C) 2017-2018 Netronome Systems, Inc. * * This software is licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this * source tree. * * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/bug.h> #include <linux/kdev_t.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/proc_ns.h> #include <linux/rhashtable.h> #include <linux/rtnetlink.h> #include <linux/rwsem.h> #include <net/xdp.h> /* Protects offdevs, members of bpf_offload_netdev and offload members * of all progs. * RTNL lock cannot be taken when holding this lock. */ static DECLARE_RWSEM(bpf_devs_lock); struct bpf_offload_dev { const struct bpf_prog_offload_ops *ops; struct list_head netdevs; void *priv; }; struct bpf_offload_netdev { struct rhash_head l; struct net_device *netdev; struct bpf_offload_dev *offdev; /* NULL when bound-only */ struct list_head progs; struct list_head maps; struct list_head offdev_netdevs; }; static const struct rhashtable_params offdevs_params = { .nelem_hint = 4, .key_len = sizeof(struct net_device *), .key_offset = offsetof(struct bpf_offload_netdev, netdev), .head_offset = offsetof(struct bpf_offload_netdev, l), .automatic_shrinking = true, }; static struct rhashtable offdevs; static int bpf_dev_offload_check(struct net_device *netdev) { if (!netdev) return -EINVAL; if (!netdev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; return 0; } static struct bpf_offload_netdev * bpf_offload_find_netdev(struct net_device *netdev) { lockdep_assert_held(&bpf_devs_lock); return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params); } static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev) { struct bpf_offload_netdev *ondev; int err; ondev = kzalloc(sizeof(*ondev), GFP_KERNEL); if (!ondev) return -ENOMEM; ondev->netdev = netdev; ondev->offdev = offdev; INIT_LIST_HEAD(&ondev->progs); INIT_LIST_HEAD(&ondev->maps); err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params); if (err) { netdev_warn(netdev, "failed to register for BPF offload\n"); goto err_free; } if (offdev) list_add(&ondev->offdev_netdevs, &offdev->netdevs); return 0; err_free: kfree(ondev); return err; } static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; if (offload->dev_state) offload->offdev->ops->destroy(prog); list_del_init(&offload->offloads); kfree(offload); prog->aux->offload = NULL; } static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap, enum bpf_netdev_command cmd) { struct netdev_bpf data = {}; struct net_device *netdev; ASSERT_RTNL(); data.command = cmd; data.offmap = offmap; /* Caller must make sure netdev is valid */ netdev = offmap->netdev; return netdev->netdev_ops->ndo_bpf(netdev, &data); } static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap) { WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE)); /* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */ bpf_map_free_id(&offmap->map); list_del_init(&offmap->offloads); offmap->netdev = NULL; } static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev) { struct bpf_offload_netdev *ondev, *altdev = NULL; struct bpf_offloaded_map *offmap, *mtmp; struct bpf_prog_offload *offload, *ptmp; ASSERT_RTNL(); ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params); if (WARN_ON(!ondev)) return; WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params)); /* Try to move the objects to another netdev of the device */ if (offdev) { list_del(&ondev->offdev_netdevs); altdev = list_first_entry_or_null(&offdev->netdevs, struct bpf_offload_netdev, offdev_netdevs); } if (altdev) { list_for_each_entry(offload, &ondev->progs, offloads) offload->netdev = altdev->netdev; list_splice_init(&ondev->progs, &altdev->progs); list_for_each_entry(offmap, &ondev->maps, offloads) offmap->netdev = altdev->netdev; list_splice_init(&ondev->maps, &altdev->maps); } else { list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads) __bpf_prog_offload_destroy(offload->prog); list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads) __bpf_map_offload_destroy(offmap); } WARN_ON(!list_empty(&ondev->progs)); WARN_ON(!list_empty(&ondev->maps)); kfree(ondev); } static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *netdev) { struct bpf_offload_netdev *ondev; struct bpf_prog_offload *offload; int err; offload = kzalloc(sizeof(*offload), GFP_USER); if (!offload) return -ENOMEM; offload->prog = prog; offload->netdev = netdev; ondev = bpf_offload_find_netdev(offload->netdev); /* When program is offloaded require presence of "true" * bpf_offload_netdev, avoid the one created for !ondev case below. */ if (bpf_prog_is_offloaded(prog->aux) && (!ondev || !ondev->offdev)) { err = -EINVAL; goto err_free; } if (!ondev) { /* When only binding to the device, explicitly * create an entry in the hashtable. */ err = __bpf_offload_dev_netdev_register(NULL, offload->netdev); if (err) goto err_free; ondev = bpf_offload_find_netdev(offload->netdev); } offload->offdev = ondev->offdev; prog->aux->offload = offload; list_add_tail(&offload->offloads, &ondev->progs); return 0; err_free: kfree(offload); return err; } int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr) { struct net_device *netdev; int err; if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS && attr->prog_type != BPF_PROG_TYPE_XDP) return -EINVAL; if (attr->prog_flags & ~(BPF_F_XDP_DEV_BOUND_ONLY | BPF_F_XDP_HAS_FRAGS)) return -EINVAL; /* Frags are allowed only if program is dev-bound-only, but not * if it is requesting bpf offload. */ if (attr->prog_flags & BPF_F_XDP_HAS_FRAGS && !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)) return -EINVAL; if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS && attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY) return -EINVAL; netdev = dev_get_by_index(current->nsproxy->net_ns, attr->prog_ifindex); if (!netdev) return -EINVAL; err = bpf_dev_offload_check(netdev); if (err) goto out; prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY); down_write(&bpf_devs_lock); err = __bpf_prog_dev_bound_init(prog, netdev); up_write(&bpf_devs_lock); out: dev_put(netdev); return err; } int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog) { int err; if (!bpf_prog_is_dev_bound(old_prog->aux)) return 0; if (bpf_prog_is_offloaded(old_prog->aux)) return -EINVAL; new_prog->aux->dev_bound = old_prog->aux->dev_bound; new_prog->aux->offload_requested = old_prog->aux->offload_requested; down_write(&bpf_devs_lock); if (!old_prog->aux->offload) { err = -EINVAL; goto out; } err = __bpf_prog_dev_bound_init(new_prog, old_prog->aux->offload->netdev); out: up_write(&bpf_devs_lock); return err; } int bpf_prog_offload_verifier_prep(struct bpf_prog *prog) { struct bpf_prog_offload *offload; int ret = -ENODEV; down_read(&bpf_devs_lock); offload = prog->aux->offload; if (offload) { ret = offload->offdev->ops->prepare(prog); offload->dev_state = !ret; } up_read(&bpf_devs_lock); return ret; } int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { struct bpf_prog_offload *offload; int ret = -ENODEV; down_read(&bpf_devs_lock); offload = env->prog->aux->offload; if (offload) ret = offload->offdev->ops->insn_hook(env, insn_idx, prev_insn_idx); up_read(&bpf_devs_lock); return ret; } int bpf_prog_offload_finalize(struct bpf_verifier_env *env) { struct bpf_prog_offload *offload; int ret = -ENODEV; down_read(&bpf_devs_lock); offload = env->prog->aux->offload; if (offload) { if (offload->offdev->ops->finalize) ret = offload->offdev->ops->finalize(env); else ret = 0; } up_read(&bpf_devs_lock); return ret; } void bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, struct bpf_insn *insn) { const struct bpf_prog_offload_ops *ops; struct bpf_prog_offload *offload; int ret = -EOPNOTSUPP; down_read(&bpf_devs_lock); offload = env->prog->aux->offload; if (offload) { ops = offload->offdev->ops; if (!offload->opt_failed && ops->replace_insn) ret = ops->replace_insn(env, off, insn); offload->opt_failed |= ret; } up_read(&bpf_devs_lock); } void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) { struct bpf_prog_offload *offload; int ret = -EOPNOTSUPP; down_read(&bpf_devs_lock); offload = env->prog->aux->offload; if (offload) { if (!offload->opt_failed && offload->offdev->ops->remove_insns) ret = offload->offdev->ops->remove_insns(env, off, cnt); offload->opt_failed |= ret; } up_read(&bpf_devs_lock); } void bpf_prog_dev_bound_destroy(struct bpf_prog *prog) { struct bpf_offload_netdev *ondev; struct net_device *netdev; rtnl_lock(); down_write(&bpf_devs_lock); if (prog->aux->offload) { list_del_init(&prog->aux->offload->offloads); netdev = prog->aux->offload->netdev; __bpf_prog_offload_destroy(prog); ondev = bpf_offload_find_netdev(netdev); if (!ondev->offdev && list_empty(&ondev->progs)) __bpf_offload_dev_netdev_unregister(NULL, netdev); } up_write(&bpf_devs_lock); rtnl_unlock(); } static int bpf_prog_offload_translate(struct bpf_prog *prog) { struct bpf_prog_offload *offload; int ret = -ENODEV; down_read(&bpf_devs_lock); offload = prog->aux->offload; if (offload) ret = offload->offdev->ops->translate(prog); up_read(&bpf_devs_lock); return ret; } static unsigned int bpf_prog_warn_on_exec(const void *ctx, const struct bpf_insn *insn) { WARN(1, "attempt to execute device eBPF program on the host!"); return 0; } int bpf_prog_offload_compile(struct bpf_prog *prog) { prog->bpf_func = bpf_prog_warn_on_exec; return bpf_prog_offload_translate(prog); } struct ns_get_path_bpf_prog_args { struct bpf_prog *prog; struct bpf_prog_info *info; }; static struct ns_common *bpf_prog_offload_info_fill_ns(void *private_data) { struct ns_get_path_bpf_prog_args *args = private_data; struct bpf_prog_aux *aux = args->prog->aux; struct ns_common *ns; struct net *net; rtnl_lock(); down_read(&bpf_devs_lock); if (aux->offload) { args->info->ifindex = aux->offload->netdev->ifindex; net = dev_net(aux->offload->netdev); get_net(net); ns = &net->ns; } else { args->info->ifindex = 0; ns = NULL; } up_read(&bpf_devs_lock); rtnl_unlock(); return ns; } int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog) { struct ns_get_path_bpf_prog_args args = { .prog = prog, .info = info, }; struct bpf_prog_aux *aux = prog->aux; struct inode *ns_inode; struct path ns_path; char __user *uinsns; int res; u32 ulen; res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args); if (res) { if (!info->ifindex) return -ENODEV; return res; } down_read(&bpf_devs_lock); if (!aux->offload) { up_read(&bpf_devs_lock); return -ENODEV; } ulen = info->jited_prog_len; info->jited_prog_len = aux->offload->jited_len; if (info->jited_prog_len && ulen) { uinsns = u64_to_user_ptr(info->jited_prog_insns); ulen = min_t(u32, info->jited_prog_len, ulen); if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) { up_read(&bpf_devs_lock); return -EFAULT; } } up_read(&bpf_devs_lock); ns_inode = ns_path.dentry->d_inode; info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); info->netns_ino = ns_inode->i_ino; path_put(&ns_path); return 0; } const struct bpf_prog_ops bpf_offload_prog_ops = { }; struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) { struct net *net = current->nsproxy->net_ns; struct bpf_offload_netdev *ondev; struct bpf_offloaded_map *offmap; int err; if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); if (attr->map_type != BPF_MAP_TYPE_ARRAY && attr->map_type != BPF_MAP_TYPE_HASH) return ERR_PTR(-EINVAL); offmap = bpf_map_area_alloc(sizeof(*offmap), NUMA_NO_NODE); if (!offmap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&offmap->map, attr); rtnl_lock(); down_write(&bpf_devs_lock); offmap->netdev = __dev_get_by_index(net, attr->map_ifindex); err = bpf_dev_offload_check(offmap->netdev); if (err) goto err_unlock; ondev = bpf_offload_find_netdev(offmap->netdev); if (!ondev) { err = -EINVAL; goto err_unlock; } err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC); if (err) goto err_unlock; list_add_tail(&offmap->offloads, &ondev->maps); up_write(&bpf_devs_lock); rtnl_unlock(); return &offmap->map; err_unlock: up_write(&bpf_devs_lock); rtnl_unlock(); bpf_map_area_free(offmap); return ERR_PTR(err); } void bpf_map_offload_map_free(struct bpf_map *map) { struct bpf_offloaded_map *offmap = map_to_offmap(map); rtnl_lock(); down_write(&bpf_devs_lock); if (offmap->netdev) __bpf_map_offload_destroy(offmap); up_write(&bpf_devs_lock); rtnl_unlock(); bpf_map_area_free(offmap); } u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map) { /* The memory dynamically allocated in netdev dev_ops is not counted */ return sizeof(struct bpf_offloaded_map); } int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value) { struct bpf_offloaded_map *offmap = map_to_offmap(map); int ret = -ENODEV; down_read(&bpf_devs_lock); if (offmap->netdev) ret = offmap->dev_ops->map_lookup_elem(offmap, key, value); up_read(&bpf_devs_lock); return ret; } int bpf_map_offload_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { struct bpf_offloaded_map *offmap = map_to_offmap(map); int ret = -ENODEV; if (unlikely(flags > BPF_EXIST)) return -EINVAL; down_read(&bpf_devs_lock); if (offmap->netdev) ret = offmap->dev_ops->map_update_elem(offmap, key, value, flags); up_read(&bpf_devs_lock); return ret; } int bpf_map_offload_delete_elem(struct bpf_map *map, void *key) { struct bpf_offloaded_map *offmap = map_to_offmap(map); int ret = -ENODEV; down_read(&bpf_devs_lock); if (offmap->netdev) ret = offmap->dev_ops->map_delete_elem(offmap, key); up_read(&bpf_devs_lock); return ret; } int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_offloaded_map *offmap = map_to_offmap(map); int ret = -ENODEV; down_read(&bpf_devs_lock); if (offmap->netdev) ret = offmap->dev_ops->map_get_next_key(offmap, key, next_key); up_read(&bpf_devs_lock); return ret; } struct ns_get_path_bpf_map_args { struct bpf_offloaded_map *offmap; struct bpf_map_info *info; }; static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data) { struct ns_get_path_bpf_map_args *args = private_data; struct ns_common *ns; struct net *net; rtnl_lock(); down_read(&bpf_devs_lock); if (args->offmap->netdev) { args->info->ifindex = args->offmap->netdev->ifindex; net = dev_net(args->offmap->netdev); get_net(net); ns = &net->ns; } else { args->info->ifindex = 0; ns = NULL; } up_read(&bpf_devs_lock); rtnl_unlock(); return ns; } int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map) { struct ns_get_path_bpf_map_args args = { .offmap = map_to_offmap(map), .info = info, }; struct inode *ns_inode; struct path ns_path; int res; res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args); if (res) { if (!info->ifindex) return -ENODEV; return res; } ns_inode = ns_path.dentry->d_inode; info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); info->netns_ino = ns_inode->i_ino; path_put(&ns_path); return 0; } static bool __bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev) { struct bpf_offload_netdev *ondev1, *ondev2; struct bpf_prog_offload *offload; if (!bpf_prog_is_dev_bound(prog->aux)) return false; offload = prog->aux->offload; if (!offload) return false; if (offload->netdev == netdev) return true; ondev1 = bpf_offload_find_netdev(offload->netdev); ondev2 = bpf_offload_find_netdev(netdev); return ondev1 && ondev2 && ondev1->offdev == ondev2->offdev; } bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev) { bool ret; down_read(&bpf_devs_lock); ret = __bpf_offload_dev_match(prog, netdev); up_read(&bpf_devs_lock); return ret; } EXPORT_SYMBOL_GPL(bpf_offload_dev_match); bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs) { bool ret; if (bpf_prog_is_offloaded(lhs->aux) != bpf_prog_is_offloaded(rhs->aux)) return false; down_read(&bpf_devs_lock); ret = lhs->aux->offload && rhs->aux->offload && lhs->aux->offload->netdev && lhs->aux->offload->netdev == rhs->aux->offload->netdev; up_read(&bpf_devs_lock); return ret; } bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map) { struct bpf_offloaded_map *offmap; bool ret; if (!bpf_map_is_offloaded(map)) return bpf_map_offload_neutral(map); offmap = map_to_offmap(map); down_read(&bpf_devs_lock); ret = __bpf_offload_dev_match(prog, offmap->netdev); up_read(&bpf_devs_lock); return ret; } int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev) { int err; down_write(&bpf_devs_lock); err = __bpf_offload_dev_netdev_register(offdev, netdev); up_write(&bpf_devs_lock); return err; } EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register); void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev) { down_write(&bpf_devs_lock); __bpf_offload_dev_netdev_unregister(offdev, netdev); up_write(&bpf_devs_lock); } EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); struct bpf_offload_dev * bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv) { struct bpf_offload_dev *offdev; offdev = kzalloc(sizeof(*offdev), GFP_KERNEL); if (!offdev) return ERR_PTR(-ENOMEM); offdev->ops = ops; offdev->priv = priv; INIT_LIST_HEAD(&offdev->netdevs); return offdev; } EXPORT_SYMBOL_GPL(bpf_offload_dev_create); void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev) { WARN_ON(!list_empty(&offdev->netdevs)); kfree(offdev); } EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy); void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev) { return offdev->priv; } EXPORT_SYMBOL_GPL(bpf_offload_dev_priv); void bpf_dev_bound_netdev_unregister(struct net_device *dev) { struct bpf_offload_netdev *ondev; ASSERT_RTNL(); down_write(&bpf_devs_lock); ondev = bpf_offload_find_netdev(dev); if (ondev && !ondev->offdev) __bpf_offload_dev_netdev_unregister(NULL, ondev->netdev); up_write(&bpf_devs_lock); } int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, struct bpf_prog_aux *prog_aux) { if (!bpf_prog_is_dev_bound(prog_aux)) { bpf_log(log, "metadata kfuncs require device-bound program\n"); return -EINVAL; } if (bpf_prog_is_offloaded(prog_aux)) { bpf_log(log, "metadata kfuncs can't be offloaded\n"); return -EINVAL; } return 0; } void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id) { const struct xdp_metadata_ops *ops; void *p = NULL; /* We don't hold bpf_devs_lock while resolving several * kfuncs and can race with the unregister_netdevice(). * We rely on bpf_dev_bound_match() check at attach * to render this program unusable. */ down_read(&bpf_devs_lock); if (!prog->aux->offload) goto out; ops = prog->aux->offload->netdev->xdp_metadata_ops; if (!ops) goto out; #define XDP_METADATA_KFUNC(name, _, __, xmo) \ if (func_id == bpf_xdp_metadata_kfunc_id(name)) p = ops->xmo; XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC out: up_read(&bpf_devs_lock); return p; } static int __init bpf_offload_init(void) { return rhashtable_init(&offdevs, &offdevs_params); } core_initcall(bpf_offload_init); |
169 139 89 42 69 29 66 18 107 83 57 71 91 96 69 105 4 4 11 101 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | // SPDX-License-Identifier: GPL-2.0-only #include <net/tcp.h> /* The bandwidth estimator estimates the rate at which the network * can currently deliver outbound data packets for this flow. At a high * level, it operates by taking a delivery rate sample for each ACK. * * A rate sample records the rate at which the network delivered packets * for this flow, calculated over the time interval between the transmission * of a data packet and the acknowledgment of that packet. * * Specifically, over the interval between each transmit and corresponding ACK, * the estimator generates a delivery rate sample. Typically it uses the rate * at which packets were acknowledged. However, the approach of using only the * acknowledgment rate faces a challenge under the prevalent ACK decimation or * compression: packets can temporarily appear to be delivered much quicker * than the bottleneck rate. Since it is physically impossible to do that in a * sustained fashion, when the estimator notices that the ACK rate is faster * than the transmit rate, it uses the latter: * * send_rate = #pkts_delivered/(last_snd_time - first_snd_time) * ack_rate = #pkts_delivered/(last_ack_time - first_ack_time) * bw = min(send_rate, ack_rate) * * Notice the estimator essentially estimates the goodput, not always the * network bottleneck link rate when the sending or receiving is limited by * other factors like applications or receiver window limits. The estimator * deliberately avoids using the inter-packet spacing approach because that * approach requires a large number of samples and sophisticated filtering. * * TCP flows can often be application-limited in request/response workloads. * The estimator marks a bandwidth sample as application-limited if there * was some moment during the sampled window of packets when there was no data * ready to send in the write queue. */ /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* In general we need to start delivery rate samples from the * time we received the most recent ACK, to ensure we include * the full time the network needs to deliver all in-flight * packets. If there are no packets in flight yet, then we * know that any ACKs after now indicate that the network was * able to deliver those packets completely in the sampling * interval between now and the next ACK. * * Note that we use packets_out instead of tcp_packets_in_flight(tp) * because the latter is a guess based on RTO and loss-marking * heuristics. We don't want spurious RTOs or loss markings to cause * a spuriously small time interval, causing a spuriously high * bandwidth estimate. */ if (!tp->packets_out) { u64 tstamp_us = tcp_skb_timestamp_us(skb); tp->first_tx_mstamp = tstamp_us; tp->delivered_mstamp = tstamp_us; } TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) * delivery information when the skb was last transmitted. * * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is * called multiple times. We favor the information from the most recently * sent skb, i.e., the skb with the most recently sent time and the highest * sequence. */ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); u64 tx_tstamp; if (!scb->tx.delivered_mstamp) return; tx_tstamp = tcp_skb_timestamp_us(skb); if (!rs->prior_delivered || tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, scb->end_seq, rs->last_end_seq)) { rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, scb->tx.first_tx_mstamp); } /* Mark off the skb delivered once it's sacked to avoid being * used again when it's cumulatively acked. For acked packets * we don't need to reset since it'll be freed soon. */ if (scb->sacked & TCPCB_SACKED_ACKED) scb->tx.delivered_mstamp = 0; } /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, bool is_sack_reneg, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; /* Clear app limited if bubble is acked and gone. */ if (tp->app_limited && after(tp->delivered, tp->app_limited)) tp->app_limited = 0; /* TODO: there are multiple places throughout tcp_ack() to get * current time. Refactor the code using a new "tcp_acktag_state" * to carry current time, flags, stats like "tcp_sacktag_state". */ if (delivered) tp->delivered_mstamp = tp->tcp_mstamp; rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ /* Return an invalid sample if no timing information is available or * in recovery from loss with SACK reneging. Rate samples taken during * a SACK reneging event may overestimate bw by including packets that * were SACKed before the reneg. */ if (!rs->prior_mstamp || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; } rs->delivered = tp->delivered - rs->prior_delivered; rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; /* delivered_ce occupies less than 32 bits in the skb control block */ rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK; /* Model sending data and receiving ACKs as separate pipeline phases * for a window. Usually the ACK phase is longer, but with ACK * compression the send phase can be longer. To be safe we use the * longer phase. */ snd_us = rs->interval_us; /* send phase */ ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); /* Record both segment send and ack receive intervals */ rs->snd_interval_us = snd_us; rs->rcv_interval_us = ack_us; /* Normally we expect interval_us >= min-rtt. * Note that rate may still be over-estimated when a spuriously * retransmistted skb was first (s)acked because "interval_us" * is under-estimated (up to an RTT). However continuously * measuring the delivery rate during loss recovery is crucial * for connections suffer heavy or prolonged losses. */ if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { if (!rs->is_retrans) pr_debug("tcp rate: %ld %d %u %u %u\n", rs->interval_us, rs->delivered, inet_csk(sk)->icsk_ca_state, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); rs->interval_us = -1; return; } /* Record the last non-app-limited or the highest app-limited bw */ if (!rs->is_app_limited || ((u64)rs->delivered * tp->rate_interval_us >= (u64)tp->rate_delivered * rs->interval_us)) { tp->rate_delivered = rs->delivered; tp->rate_interval_us = rs->interval_us; tp->rate_app_limited = rs->is_app_limited; } } /* If a gap is detected between sends, mark the socket application-limited. */ void tcp_rate_check_app_limited(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (/* We have less than one packet to send. */ tp->write_seq - tp->snd_nxt < tp->mss_cache && /* Nothing in sending host's qdisc queues or NIC tx queue. */ sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && /* We are not limited by CWND. */ tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) && /* All lost packets have been retransmitted. */ tp->lost_out <= tp->retrans_out) tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; } EXPORT_SYMBOL_GPL(tcp_rate_check_app_limited); |
3 3 3 1 1 3 2 1 3 6 6 2 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the list:set type */ #include <linux/module.h> #include <linux/ip.h> #include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_list.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Counters support added */ /* 2 Comments support added */ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements */ struct set_elem { struct rcu_head rcu; struct list_head list; struct ip_set *set; /* Sigh, in order to cleanup reference */ ip_set_id_t id; } __aligned(__alignof__(u64)); struct set_adt_elem { ip_set_id_t id; ip_set_id_t refid; int before; }; /* Type structure */ struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ struct ip_set *set; /* attached to this ip_set */ struct net *net; /* namespace */ struct list_head members; /* the set members */ }; static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) { struct list_set *map = set->data; struct ip_set_ext *mext = &opt->ext; struct set_elem *e; u32 flags = opt->cmdflags; int ret; /* Don't lookup sub-counters at all */ opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags |= IPSET_FLAG_SKIP_COUNTER_UPDATE; list_for_each_entry_rcu(e, &map->members, list) { ret = ip_set_test(e->id, skb, par, opt); if (ret <= 0) continue; if (ip_set_match_extensions(set, ext, mext, flags, e)) return 1; } return 0; } static int list_set_kadd(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) { struct list_set *map = set->data; struct set_elem *e; int ret; list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_add(e->id, skb, par, opt); if (ret == 0) return ret; } return 0; } static int list_set_kdel(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) { struct list_set *map = set->data; struct set_elem *e; int ret; list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_del(e->id, skb, par, opt); if (ret == 0) return ret; } return 0; } static int list_set_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret = -EINVAL; rcu_read_lock(); switch (adt) { case IPSET_TEST: ret = list_set_ktest(set, skb, par, opt, &ext); break; case IPSET_ADD: ret = list_set_kadd(set, skb, par, opt, &ext); break; case IPSET_DEL: ret = list_set_kdel(set, skb, par, opt, &ext); break; default: break; } rcu_read_unlock(); return ret; } /* Userspace interfaces: we are protected by the nfnl mutex */ static void __list_set_del_rcu(struct rcu_head * rcu) { struct set_elem *e = container_of(rcu, struct set_elem, rcu); struct ip_set *set = e->set; ip_set_ext_destroy(set, e); kfree(e); } static void list_set_del(struct ip_set *set, struct set_elem *e) { struct list_set *map = set->data; set->elements--; list_del_rcu(&e->list); ip_set_put_byindex(map->net, e->id); call_rcu(&e->rcu, __list_set_del_rcu); } static void list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) { struct list_set *map = set->data; list_replace_rcu(&old->list, &e->list); ip_set_put_byindex(map->net, old->id); call_rcu(&old->rcu, __list_set_del_rcu); } static void set_cleanup_entries(struct ip_set *set) { struct list_set *map = set->data; struct set_elem *e, *n; list_for_each_entry_safe(e, n, &map->members, list) if (ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, e); } static int list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct list_set *map = set->data; struct set_adt_elem *d = value; struct set_elem *e, *next, *prev = NULL; int ret = 0; rcu_read_lock(); list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) { prev = e; continue; } if (d->before == 0) { ret = 1; goto out; } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && next->id == d->refid; } else { ret = prev && prev->id == d->refid; } goto out; } out: rcu_read_unlock(); return ret; } static void list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, struct set_elem *e) { if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); if (SET_WITH_COMMENT(set)) ip_set_init_comment(set, ext_comment(e, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(e, set), ext); /* Update timeout last */ if (SET_WITH_TIMEOUT(set)) ip_set_timeout_set(ext_timeout(e, set), ext->timeout); } static int list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct list_set *map = set->data; struct set_adt_elem *d = value; struct set_elem *e, *n, *prev, *next; bool flag_exist = flags & IPSET_FLAG_EXIST; /* Find where to add the new entry */ n = prev = next = NULL; list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (d->id == e->id) n = e; else if (d->before == 0 || e->id != d->refid) continue; else if (d->before > 0) next = e; else prev = e; } /* If before/after is used on an empty set */ if ((d->before > 0 && !next) || (d->before < 0 && !prev)) return -IPSET_ERR_REF_EXIST; /* Re-add already existing element */ if (n) { if (!flag_exist) return -IPSET_ERR_EXIST; /* Update extensions */ ip_set_ext_destroy(set, n); list_set_init_extensions(set, ext, n); /* Set is already added to the list */ ip_set_put_byindex(map->net, d->id); return 0; } /* Add new entry */ if (d->before == 0) { /* Append */ n = list_empty(&map->members) ? NULL : list_last_entry(&map->members, struct set_elem, list); } else if (d->before > 0) { /* Insert after next element */ if (!list_is_last(&next->list, &map->members)) n = list_next_entry(next, list); } else { /* Insert before prev element */ if (prev->list.prev != &map->members) n = list_prev_entry(prev, list); } /* Can we replace a timed out entry? */ if (n && !(SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(n, set)))) n = NULL; e = kzalloc(set->dsize, GFP_ATOMIC); if (!e) return -ENOMEM; e->id = d->id; e->set = set; INIT_LIST_HEAD(&e->list); list_set_init_extensions(set, ext, e); if (n) list_set_replace(set, e, n); else if (next) list_add_tail_rcu(&e->list, &next->list); else if (prev) list_add_rcu(&e->list, &prev->list); else list_add_tail_rcu(&e->list, &map->members); set->elements++; return 0; } static int list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct list_set *map = set->data; struct set_adt_elem *d = value; struct set_elem *e, *n, *next, *prev = NULL; list_for_each_entry_safe(e, n, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) { prev = e; continue; } if (d->before > 0) { next = list_next_entry(e, list); if (list_is_last(&e->list, &map->members) || next->id != d->refid) return -IPSET_ERR_REF_EXIST; } else if (d->before < 0) { if (!prev || prev->id != d->refid) return -IPSET_ERR_REF_EXIST; } list_set_del(set, e); return 0; } return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; } static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set *s; int ret = 0; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_NAME] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ if (s->type->features & IPSET_TYPE_NAME) { ret = -IPSET_ERR_LOOP; goto finish; } if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); e.before = f & IPSET_FLAG_BEFORE; } if (e.before && !tb[IPSET_ATTR_NAMEREF]) { ret = -IPSET_ERR_BEFORE; goto finish; } if (tb[IPSET_ATTR_NAMEREF]) { e.refid = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAMEREF]), &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; goto finish; } if (!e.before) e.before = -1; } if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set)) set_cleanup_entries(set); ret = adtfn(set, &e, &ext, &ext, flags); finish: if (e.refid != IPSET_INVALID_ID) ip_set_put_byindex(map->net, e.refid); if (adt != IPSET_ADD || ret) ip_set_put_byindex(map->net, e.id); return ip_set_eexist(ret, flags) ? 0 : ret; } static void list_set_flush(struct ip_set *set) { struct list_set *map = set->data; struct set_elem *e, *n; list_for_each_entry_safe(e, n, &map->members, list) list_set_del(set, e); set->elements = 0; set->ext_size = 0; } static void list_set_destroy(struct ip_set *set) { struct list_set *map = set->data; WARN_ON_ONCE(!list_empty(&map->members)); kfree(map); set->data = NULL; } /* Calculate the actual memory size of the set data */ static size_t list_set_memsize(const struct list_set *map, size_t dsize) { struct set_elem *e; u32 n = 0; rcu_read_lock(); list_for_each_entry_rcu(e, &map->members, list) n++; rcu_read_unlock(); return (sizeof(*map) + n * dsize); } static int list_set_head(struct ip_set *set, struct sk_buff *skb) { const struct list_set *map = set->data; struct nlattr *nested; size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size; nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; nla_nest_end(skb, nested); return 0; nla_put_failure: return -EMSGSIZE; } static int list_set_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { const struct list_set *map = set->data; struct nlattr *atd, *nested; u32 i = 0, first = cb->args[IPSET_CB_ARG0]; char name[IPSET_MAXNAMELEN]; struct set_elem *e; int ret = 0; atd = nla_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; rcu_read_lock(); list_for_each_entry_rcu(e, &map->members, list) { if (i < first || (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set)))) { i++; continue; } nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; ip_set_name_byindex(map->net, e->id, name); if (nla_put_string(skb, IPSET_ATTR_NAME, name)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; nla_nest_end(skb, nested); i++; } nla_nest_end(skb, atd); /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; goto out; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { nla_nest_cancel(skb, atd); cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; } else { cb->args[IPSET_CB_ARG0] = i; nla_nest_end(skb, atd); } out: rcu_read_unlock(); return ret; } static bool list_set_same_set(const struct ip_set *a, const struct ip_set *b) { const struct list_set *x = a->data; const struct list_set *y = b->data; return x->size == y->size && a->timeout == b->timeout && a->extensions == b->extensions; } static void list_set_cancel_gc(struct ip_set *set) { struct list_set *map = set->data; if (SET_WITH_TIMEOUT(set)) timer_shutdown_sync(&map->gc); /* Flush list to drop references to other ipsets */ list_set_flush(set); } static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, .adt = { [IPSET_ADD] = list_set_uadd, [IPSET_DEL] = list_set_udel, [IPSET_TEST] = list_set_utest, }, .destroy = list_set_destroy, .flush = list_set_flush, .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, .cancel_gc = list_set_cancel_gc, }; static void list_set_gc(struct timer_list *t) { struct list_set *map = from_timer(map, t, gc); struct ip_set *set = map->set; spin_lock_bh(&set->lock); set_cleanup_entries(set); spin_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } static void list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) { struct list_set *map = set->data; timer_setup(&map->gc, gc, 0); mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } /* Create list:set type of sets */ static bool init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) return false; map->size = size; map->net = net; map->set = set; INIT_LIST_HEAD(&map->members); set->data = map; return true; } static int list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 size = IP_SET_LIST_DEFAULT_SIZE; if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_SIZE]) size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]); if (size < IP_SET_LIST_MIN_SIZE) size = IP_SET_LIST_MIN_SIZE; set->variant = &set_variant; set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem), __alignof__(struct set_elem)); if (!init_list_set(net, set, size)) return -ENOMEM; if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); list_set_gc_init(set, list_set_gc); } return 0; } static struct ip_set_type list_set_type __read_mostly = { .name = "list:set", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_NAME] = { .type = NLA_STRING, .len = IPSET_MAXNAMELEN }, [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING, .len = IPSET_MAXNAMELEN }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init list_set_init(void) { return ip_set_type_register(&list_set_type); } static void __exit list_set_fini(void) { rcu_barrier(); ip_set_type_unregister(&list_set_type); } module_init(list_set_init); module_exit(list_set_fini); |
4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_SECTIONS_H_ #define _ASM_GENERIC_SECTIONS_H_ /* References to section boundaries */ #include <linux/compiler.h> #include <linux/types.h> /* * Usage guidelines: * _text, _data: architecture specific, don't use them in arch-independent code * [_stext, _etext]: contains .text.* sections, may also contain .rodata.* * and/or .init.* sections * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.* * and/or .init.* sections. * [__start_rodata, __end_rodata]: contains .rodata.* sections * [__start_ro_after_init, __end_ro_after_init]: * contains .data..ro_after_init section * [__init_begin, __init_end]: contains .init.* sections, but .init.text.* * may be out of this range on some architectures. * [_sinittext, _einittext]: contains .init.text.* sections * [__bss_start, __bss_stop]: contains BSS sections * * Following global variables are optional and may be unavailable on some * architectures and/or kernel configurations. * _text, _data * __kprobes_text_start, __kprobes_text_end * __entry_text_start, __entry_text_end * __ctors_start, __ctors_end * __irqentry_text_start, __irqentry_text_end * __softirqentry_text_start, __softirqentry_text_end * __start_opd, __end_opd */ extern char _text[], _stext[], _etext[]; extern char _data[], _sdata[], _edata[]; extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char __start_ro_after_init[], __end_ro_after_init[]; extern char _end[]; extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __softirqentry_text_start[], __softirqentry_text_end[]; extern char __start_once[], __end_once[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; /* Start and end of .opd section - used for function descriptors. */ extern char __start_opd[], __end_opd[]; /* Start and end of instrumentation protected text section */ extern char __noinstr_text_start[], __noinstr_text_end[]; extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ #ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS void *dereference_function_descriptor(void *ptr); void *dereference_kernel_function_descriptor(void *ptr); #else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) /* An address is simply the address of the function. */ typedef struct { unsigned long addr; } func_desc_t; #endif static inline bool have_function_descriptors(void) { return IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS); } /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region * @end: virtual address of the end of the memory region * @virt: virtual address of the memory object * @size: size of the memory object * * Returns: true if the object specified by @virt and @size is entirely * contained within the memory region defined by @begin and @end, false * otherwise. */ static inline bool memory_contains(void *begin, void *end, void *virt, size_t size) { return virt >= begin && virt + size <= end; } /** * memory_intersects - checks if the region occupied by an object intersects * with another memory region * @begin: virtual address of the beginning of the memory region * @end: virtual address of the end of the memory region * @virt: virtual address of the memory object * @size: size of the memory object * * Returns: true if an object's memory region, specified by @virt and @size, * intersects with the region specified by @begin and @end, false otherwise. */ static inline bool memory_intersects(void *begin, void *end, void *virt, size_t size) { void *vend = virt + size; if (virt < end && vend > begin) return true; return false; } /** * init_section_contains - checks if an object is contained within the init * section * @virt: virtual address of the memory object * @size: size of the memory object * * Returns: true if the object specified by @virt and @size is entirely * contained within the init section, false otherwise. */ static inline bool init_section_contains(void *virt, size_t size) { return memory_contains(__init_begin, __init_end, virt, size); } /** * init_section_intersects - checks if the region occupied by an object * intersects with the init section * @virt: virtual address of the memory object * @size: size of the memory object * * Returns: true if an object's memory region, specified by @virt and @size, * intersects with the init section, false otherwise. */ static inline bool init_section_intersects(void *virt, size_t size) { return memory_intersects(__init_begin, __init_end, virt, size); } /** * is_kernel_core_data - checks if the pointer address is located in the * .data or .bss section * * @addr: address to check * * Returns: true if the address is located in .data or .bss, false otherwise. * Note: On some archs it may return true for core RODATA, and false * for others. But will always be true for core RW data. */ static inline bool is_kernel_core_data(unsigned long addr) { if (addr >= (unsigned long)_sdata && addr < (unsigned long)_edata) return true; if (addr >= (unsigned long)__bss_start && addr < (unsigned long)__bss_stop) return true; return false; } /** * is_kernel_rodata - checks if the pointer address is located in the * .rodata section * * @addr: address to check * * Returns: true if the address is located in .rodata, false otherwise. */ static inline bool is_kernel_rodata(unsigned long addr) { return addr >= (unsigned long)__start_rodata && addr < (unsigned long)__end_rodata; } static inline bool is_kernel_ro_after_init(unsigned long addr) { return addr >= (unsigned long)__start_ro_after_init && addr < (unsigned long)__end_ro_after_init; } /** * is_kernel_inittext - checks if the pointer address is located in the * .init.text section * * @addr: address to check * * Returns: true if the address is located in .init.text, false otherwise. */ static inline bool is_kernel_inittext(unsigned long addr) { return addr >= (unsigned long)_sinittext && addr < (unsigned long)_einittext; } /** * __is_kernel_text - checks if the pointer address is located in the * .text section * * @addr: address to check * * Returns: true if the address is located in .text, false otherwise. * Note: an internal helper, only check the range of _stext to _etext. */ static inline bool __is_kernel_text(unsigned long addr) { return addr >= (unsigned long)_stext && addr < (unsigned long)_etext; } /** * __is_kernel - checks if the pointer address is located in the kernel range * * @addr: address to check * * Returns: true if the address is located in the kernel range, false otherwise. * Note: an internal helper, check the range of _stext to _end, * and range from __init_begin to __init_end, which can be outside * of the _stext to _end range. */ static inline bool __is_kernel(unsigned long addr) { return ((addr >= (unsigned long)_stext && addr < (unsigned long)_end) || (addr >= (unsigned long)__init_begin && addr < (unsigned long)__init_end)); } #endif /* _ASM_GENERIC_SECTIONS_H_ */ |
1 1 1 1 1 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 | // SPDX-License-Identifier: GPL-2.0 /* * Tty buffer allocation management */ #include <linux/types.h> #include <linux/errno.h> #include <linux/minmax.h> #include <linux/tty.h> #include <linux/tty_buffer.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/ratelimit.h> #include "tty.h" #define MIN_TTYB_SIZE 256 #define TTYB_ALIGN_MASK 0xff /* * Byte threshold to limit memory consumption for flip buffers. * The actual memory limit is > 2x this amount. */ #define TTYB_DEFAULT_MEM_LIMIT (640 * 1024UL) /* * We default to dicing tty buffer allocations to this many characters * in order to avoid multiple page allocations. We know the size of * tty_buffer itself but it must also be taken into account that the * buffer is 256 byte aligned. See tty_buffer_find for the allocation * logic this must match. */ #define TTY_BUFFER_PAGE (((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~TTYB_ALIGN_MASK) /** * tty_buffer_lock_exclusive - gain exclusive access to buffer * @port: tty port owning the flip buffer * * Guarantees safe use of the &tty_ldisc_ops.receive_buf() method by excluding * the buffer work and any pending flush from using the flip buffer. Data can * continue to be added concurrently to the flip buffer from the driver side. * * See also tty_buffer_unlock_exclusive(). */ void tty_buffer_lock_exclusive(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; atomic_inc(&buf->priority); mutex_lock(&buf->lock); } EXPORT_SYMBOL_GPL(tty_buffer_lock_exclusive); /** * tty_buffer_unlock_exclusive - release exclusive access * @port: tty port owning the flip buffer * * The buffer work is restarted if there is data in the flip buffer. * * See also tty_buffer_lock_exclusive(). */ void tty_buffer_unlock_exclusive(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; bool restart = buf->head->commit != buf->head->read; atomic_dec(&buf->priority); mutex_unlock(&buf->lock); if (restart) queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL_GPL(tty_buffer_unlock_exclusive); /** * tty_buffer_space_avail - return unused buffer space * @port: tty port owning the flip buffer * * Returns: the # of bytes which can be written by the driver without reaching * the buffer limit. * * Note: this does not guarantee that memory is available to write the returned * # of bytes (use tty_prepare_flip_string() to pre-allocate if memory * guarantee is required). */ unsigned int tty_buffer_space_avail(struct tty_port *port) { int space = port->buf.mem_limit - atomic_read(&port->buf.mem_used); return max(space, 0); } EXPORT_SYMBOL_GPL(tty_buffer_space_avail); static void tty_buffer_reset(struct tty_buffer *p, size_t size) { p->used = 0; p->size = size; p->next = NULL; p->commit = 0; p->lookahead = 0; p->read = 0; p->flags = true; } /** * tty_buffer_free_all - free buffers used by a tty * @port: tty port to free from * * Remove all the buffers pending on a tty whether queued with data or in the * free ring. Must be called when the tty is no longer in use. */ void tty_buffer_free_all(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; struct tty_buffer *p, *next; struct llist_node *llist; unsigned int freed = 0; int still_used; while ((p = buf->head) != NULL) { buf->head = p->next; freed += p->size; if (p->size > 0) kfree(p); } llist = llist_del_all(&buf->free); llist_for_each_entry_safe(p, next, llist, free) kfree(p); tty_buffer_reset(&buf->sentinel, 0); buf->head = &buf->sentinel; buf->tail = &buf->sentinel; still_used = atomic_xchg(&buf->mem_used, 0); WARN(still_used != freed, "we still have not freed %d bytes!", still_used - freed); } /** * tty_buffer_alloc - allocate a tty buffer * @port: tty port * @size: desired size (characters) * * Allocate a new tty buffer to hold the desired number of characters. We * round our buffers off in 256 character chunks to get better allocation * behaviour. * * Returns: %NULL if out of memory or the allocation would exceed the per * device queue. */ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size) { struct llist_node *free; struct tty_buffer *p; /* Round the buffer size out */ size = __ALIGN_MASK(size, TTYB_ALIGN_MASK); if (size <= MIN_TTYB_SIZE) { free = llist_del_first(&port->buf.free); if (free) { p = llist_entry(free, struct tty_buffer, free); goto found; } } /* Should possibly check if this fails for the largest buffer we * have queued and recycle that ? */ if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit) return NULL; p = kmalloc(struct_size(p, data, 2 * size), GFP_ATOMIC | __GFP_NOWARN); if (p == NULL) return NULL; found: tty_buffer_reset(p, size); atomic_add(size, &port->buf.mem_used); return p; } /** * tty_buffer_free - free a tty buffer * @port: tty port owning the buffer * @b: the buffer to free * * Free a tty buffer, or add it to the free list according to our internal * strategy. */ static void tty_buffer_free(struct tty_port *port, struct tty_buffer *b) { struct tty_bufhead *buf = &port->buf; /* Dumb strategy for now - should keep some stats */ WARN_ON(atomic_sub_return(b->size, &buf->mem_used) < 0); if (b->size > MIN_TTYB_SIZE) kfree(b); else if (b->size > 0) llist_add(&b->free, &buf->free); } /** * tty_buffer_flush - flush full tty buffers * @tty: tty to flush * @ld: optional ldisc ptr (must be referenced) * * Flush all the buffers containing receive data. If @ld != %NULL, flush the * ldisc input buffer. * * Locking: takes buffer lock to ensure single-threaded flip buffer 'consumer'. */ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld) { struct tty_port *port = tty->port; struct tty_bufhead *buf = &port->buf; struct tty_buffer *next; atomic_inc(&buf->priority); mutex_lock(&buf->lock); /* paired w/ release in __tty_buffer_request_room; ensures there are * no pending memory accesses to the freed buffer */ while ((next = smp_load_acquire(&buf->head->next)) != NULL) { tty_buffer_free(port, buf->head); buf->head = next; } buf->head->read = buf->head->commit; buf->head->lookahead = buf->head->read; if (ld && ld->ops->flush_buffer) ld->ops->flush_buffer(tty); atomic_dec(&buf->priority); mutex_unlock(&buf->lock); } /** * __tty_buffer_request_room - grow tty buffer if needed * @port: tty port * @size: size desired * @flags: buffer has to store flags along character data * * Make at least @size bytes of linear space available for the tty buffer. * * Will change over to a new buffer if the current buffer is encoded as * %TTY_NORMAL (so has no flags buffer) and the new buffer requires a flags * buffer. * * Returns: the size we managed to find. */ static int __tty_buffer_request_room(struct tty_port *port, size_t size, bool flags) { struct tty_bufhead *buf = &port->buf; struct tty_buffer *n, *b = buf->tail; size_t left = (b->flags ? 1 : 2) * b->size - b->used; bool change = !b->flags && flags; if (!change && left >= size) return size; /* This is the slow path - looking for new buffers to use */ n = tty_buffer_alloc(port, size); if (n == NULL) return change ? 0 : left; n->flags = flags; buf->tail = n; /* * Paired w/ acquire in flush_to_ldisc() and lookahead_bufs() * ensures they see all buffer data. */ smp_store_release(&b->commit, b->used); /* * Paired w/ acquire in flush_to_ldisc() and lookahead_bufs() * ensures the latest commit value can be read before the head * is advanced to the next buffer. */ smp_store_release(&b->next, n); return size; } int tty_buffer_request_room(struct tty_port *port, size_t size) { return __tty_buffer_request_room(port, size, true); } EXPORT_SYMBOL_GPL(tty_buffer_request_room); size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars, const u8 *flags, bool mutable_flags, size_t size) { bool need_flags = mutable_flags || flags[0] != TTY_NORMAL; size_t copied = 0; do { size_t goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE); size_t space = __tty_buffer_request_room(port, goal, need_flags); struct tty_buffer *tb = port->buf.tail; if (unlikely(space == 0)) break; memcpy(char_buf_ptr(tb, tb->used), chars, space); if (mutable_flags) { memcpy(flag_buf_ptr(tb, tb->used), flags, space); flags += space; } else if (tb->flags) { memset(flag_buf_ptr(tb, tb->used), flags[0], space); } else { /* tb->flags should be available once requested */ WARN_ON_ONCE(need_flags); } tb->used += space; copied += space; chars += space; /* There is a small chance that we need to split the data over * several buffers. If this is the case we must loop. */ } while (unlikely(size > copied)); return copied; } EXPORT_SYMBOL(__tty_insert_flip_string_flags); /** * tty_prepare_flip_string - make room for characters * @port: tty port * @chars: return pointer for character write area * @size: desired size * * Prepare a block of space in the buffer for data. * * This is used for drivers that need their own block copy routines into the * buffer. There is no guarantee the buffer is a DMA target! * * Returns: the length available and buffer pointer (@chars) to the space which * is now allocated and accounted for as ready for normal characters. */ size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size) { size_t space = __tty_buffer_request_room(port, size, false); if (likely(space)) { struct tty_buffer *tb = port->buf.tail; *chars = char_buf_ptr(tb, tb->used); if (tb->flags) memset(flag_buf_ptr(tb, tb->used), TTY_NORMAL, space); tb->used += space; } return space; } EXPORT_SYMBOL_GPL(tty_prepare_flip_string); /** * tty_ldisc_receive_buf - forward data to line discipline * @ld: line discipline to process input * @p: char buffer * @f: %TTY_NORMAL, %TTY_BREAK, etc. flags buffer * @count: number of bytes to process * * Callers other than flush_to_ldisc() need to exclude the kworker from * concurrent use of the line discipline, see paste_selection(). * * Returns: the number of bytes processed. */ size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f, size_t count) { if (ld->ops->receive_buf2) count = ld->ops->receive_buf2(ld->tty, p, f, count); else { count = min_t(size_t, count, ld->tty->receive_room); if (count && ld->ops->receive_buf) ld->ops->receive_buf(ld->tty, p, f, count); } return count; } EXPORT_SYMBOL_GPL(tty_ldisc_receive_buf); static void lookahead_bufs(struct tty_port *port, struct tty_buffer *head) { head->lookahead = max(head->lookahead, head->read); while (head) { struct tty_buffer *next; unsigned int count; /* * Paired w/ release in __tty_buffer_request_room(); * ensures commit value read is not stale if the head * is advancing to the next buffer. */ next = smp_load_acquire(&head->next); /* * Paired w/ release in __tty_buffer_request_room() or in * tty_buffer_flush(); ensures we see the committed buffer data. */ count = smp_load_acquire(&head->commit) - head->lookahead; if (!count) { head = next; continue; } if (port->client_ops->lookahead_buf) { u8 *p, *f = NULL; p = char_buf_ptr(head, head->lookahead); if (head->flags) f = flag_buf_ptr(head, head->lookahead); port->client_ops->lookahead_buf(port, p, f, count); } head->lookahead += count; } } static size_t receive_buf(struct tty_port *port, struct tty_buffer *head, size_t count) { u8 *p = char_buf_ptr(head, head->read); const u8 *f = NULL; size_t n; if (head->flags) f = flag_buf_ptr(head, head->read); n = port->client_ops->receive_buf(port, p, f, count); if (n > 0) memset(p, 0, n); return n; } /** * flush_to_ldisc - flush data from buffer to ldisc * @work: tty structure passed from work queue. * * This routine is called out of the software interrupt to flush data from the * buffer chain to the line discipline. * * The receive_buf() method is single threaded for each tty instance. * * Locking: takes buffer lock to ensure single-threaded flip buffer 'consumer'. */ static void flush_to_ldisc(struct work_struct *work) { struct tty_port *port = container_of(work, struct tty_port, buf.work); struct tty_bufhead *buf = &port->buf; mutex_lock(&buf->lock); while (1) { struct tty_buffer *head = buf->head; struct tty_buffer *next; size_t count, rcvd; /* Ldisc or user is trying to gain exclusive access */ if (atomic_read(&buf->priority)) break; /* paired w/ release in __tty_buffer_request_room(); * ensures commit value read is not stale if the head * is advancing to the next buffer */ next = smp_load_acquire(&head->next); /* paired w/ release in __tty_buffer_request_room() or in * tty_buffer_flush(); ensures we see the committed buffer data */ count = smp_load_acquire(&head->commit) - head->read; if (!count) { if (next == NULL) break; buf->head = next; tty_buffer_free(port, head); continue; } rcvd = receive_buf(port, head, count); head->read += rcvd; if (rcvd < count) lookahead_bufs(port, head); if (!rcvd) break; if (need_resched()) cond_resched(); } mutex_unlock(&buf->lock); } static inline void tty_flip_buffer_commit(struct tty_buffer *tail) { /* * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees * buffer data. */ smp_store_release(&tail->commit, tail->used); } /** * tty_flip_buffer_push - push terminal buffers * @port: tty port to push * * Queue a push of the terminal flip buffers to the line discipline. Can be * called from IRQ/atomic context. * * In the event of the queue being busy for flipping the work will be held off * and retried later. */ void tty_flip_buffer_push(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; tty_flip_buffer_commit(buf->tail); queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); /** * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and * push * @port: tty port * @chars: characters * @size: size * * The function combines tty_insert_flip_string() and tty_flip_buffer_push() * with the exception of properly holding the @port->lock. * * To be used only internally (by pty currently). * * Returns: the number added. */ int tty_insert_flip_string_and_push_buffer(struct tty_port *port, const u8 *chars, size_t size) { struct tty_bufhead *buf = &port->buf; unsigned long flags; spin_lock_irqsave(&port->lock, flags); size = tty_insert_flip_string(port, chars, size); if (size) tty_flip_buffer_commit(buf->tail); spin_unlock_irqrestore(&port->lock, flags); queue_work(system_unbound_wq, &buf->work); return size; } /** * tty_buffer_init - prepare a tty buffer structure * @port: tty port to initialise * * Set up the initial state of the buffer management for a tty device. Must be * called before the other tty buffer functions are used. */ void tty_buffer_init(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; mutex_init(&buf->lock); tty_buffer_reset(&buf->sentinel, 0); buf->head = &buf->sentinel; buf->tail = &buf->sentinel; init_llist_head(&buf->free); atomic_set(&buf->mem_used, 0); atomic_set(&buf->priority, 0); INIT_WORK(&buf->work, flush_to_ldisc); buf->mem_limit = TTYB_DEFAULT_MEM_LIMIT; } /** * tty_buffer_set_limit - change the tty buffer memory limit * @port: tty port to change * @limit: memory limit to set * * Change the tty buffer memory limit. * * Must be called before the other tty buffer functions are used. */ int tty_buffer_set_limit(struct tty_port *port, int limit) { if (limit < MIN_TTYB_SIZE) return -EINVAL; port->buf.mem_limit = limit; return 0; } EXPORT_SYMBOL_GPL(tty_buffer_set_limit); /* slave ptys can claim nested buffer lock when handling BRK and INTR */ void tty_buffer_set_lock_subclass(struct tty_port *port) { lockdep_set_subclass(&port->buf.lock, TTY_LOCK_SLAVE); } bool tty_buffer_restart_work(struct tty_port *port) { return queue_work(system_unbound_wq, &port->buf.work); } bool tty_buffer_cancel_work(struct tty_port *port) { return cancel_work_sync(&port->buf.work); } void tty_buffer_flush_work(struct tty_port *port) { flush_work(&port->buf.work); } |
4 4 4 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | // SPDX-License-Identifier: GPL-2.0-only /* * Vxlan multicast group handling * */ #include <linux/kernel.h> #include <net/net_namespace.h> #include <net/sock.h> #include <linux/igmp.h> #include <net/vxlan.h> #include "vxlan_private.h" /* Update multicast group membership when first VNI on * multicast address is brought up */ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, int rifindex) { union vxlan_addr *ip = (rip ? : &vxlan->default_dst.remote_ip); int ifindex = (rifindex ? : vxlan->default_dst.remote_ifindex); int ret = -EINVAL; struct sock *sk; if (ip->sa.sa_family == AF_INET) { struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, &ip->sin6.sin6_addr); release_sock(sk); #endif } return ret; } int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, int rifindex) { union vxlan_addr *ip = (rip ? : &vxlan->default_dst.remote_ip); int ifindex = (rifindex ? : vxlan->default_dst.remote_ifindex); int ret = -EINVAL; struct sock *sk; if (ip->sa.sa_family == AF_INET) { struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, &ip->sin6.sin6_addr); release_sock(sk); #endif } return ret; } static bool vxlan_group_used_match(union vxlan_addr *ip, int ifindex, union vxlan_addr *rip, int rifindex) { if (!vxlan_addr_multicast(rip)) return false; if (!vxlan_addr_equal(rip, ip)) return false; if (rifindex != ifindex) return false; return true; } static bool vxlan_group_used_by_vnifilter(struct vxlan_dev *vxlan, union vxlan_addr *ip, int ifindex) { struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); struct vxlan_vni_node *v, *tmp; if (vxlan_group_used_match(ip, ifindex, &vxlan->default_dst.remote_ip, vxlan->default_dst.remote_ifindex)) return true; list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { if (!vxlan_addr_multicast(&v->remote_ip)) continue; if (vxlan_group_used_match(ip, ifindex, &v->remote_ip, vxlan->default_dst.remote_ifindex)) return true; } return false; } /* See if multicast group is already in use by other ID */ bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev, __be32 vni, union vxlan_addr *rip, int rifindex) { union vxlan_addr *ip = (rip ? : &dev->default_dst.remote_ip); int ifindex = (rifindex ? : dev->default_dst.remote_ifindex); struct vxlan_dev *vxlan; struct vxlan_sock *sock4; #if IS_ENABLED(CONFIG_IPV6) struct vxlan_sock *sock6; #endif unsigned short family = dev->default_dst.remote_ip.sa.sa_family; sock4 = rtnl_dereference(dev->vn4_sock); /* The vxlan_sock is only used by dev, leaving group has * no effect on other vxlan devices. */ if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1) return false; #if IS_ENABLED(CONFIG_IPV6) sock6 = rtnl_dereference(dev->vn6_sock); if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1) return false; #endif list_for_each_entry(vxlan, &vn->vxlan_list, next) { if (!netif_running(vxlan->dev) || vxlan == dev) continue; if (family == AF_INET && rtnl_dereference(vxlan->vn4_sock) != sock4) continue; #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6 && rtnl_dereference(vxlan->vn6_sock) != sock6) continue; #endif if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { if (!vxlan_group_used_by_vnifilter(vxlan, ip, ifindex)) continue; } else { if (!vxlan_group_used_match(ip, ifindex, &vxlan->default_dst.remote_ip, vxlan->default_dst.remote_ifindex)) continue; } return true; } return false; } static int vxlan_multicast_join_vnigrp(struct vxlan_dev *vxlan) { struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); struct vxlan_vni_node *v, *tmp, *vgood = NULL; int ret = 0; list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { if (!vxlan_addr_multicast(&v->remote_ip)) continue; /* skip if address is same as default address */ if (vxlan_addr_equal(&v->remote_ip, &vxlan->default_dst.remote_ip)) continue; ret = vxlan_igmp_join(vxlan, &v->remote_ip, 0); if (ret == -EADDRINUSE) ret = 0; if (ret) goto out; vgood = v; } out: if (ret) { list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { if (!vxlan_addr_multicast(&v->remote_ip)) continue; if (vxlan_addr_equal(&v->remote_ip, &vxlan->default_dst.remote_ip)) continue; vxlan_igmp_leave(vxlan, &v->remote_ip, 0); if (v == vgood) break; } } return ret; } static int vxlan_multicast_leave_vnigrp(struct vxlan_dev *vxlan) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); struct vxlan_vni_node *v, *tmp; int last_err = 0, ret; list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { if (vxlan_addr_multicast(&v->remote_ip) && !vxlan_group_used(vn, vxlan, v->vni, &v->remote_ip, 0)) { ret = vxlan_igmp_leave(vxlan, &v->remote_ip, 0); if (ret) last_err = ret; } } return last_err; } int vxlan_multicast_join(struct vxlan_dev *vxlan) { int ret = 0; if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip, vxlan->default_dst.remote_ifindex); if (ret == -EADDRINUSE) ret = 0; if (ret) return ret; } if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) return vxlan_multicast_join_vnigrp(vxlan); return 0; } int vxlan_multicast_leave(struct vxlan_dev *vxlan) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); int ret = 0; if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && !vxlan_group_used(vn, vxlan, 0, NULL, 0)) { ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip, vxlan->default_dst.remote_ifindex); if (ret) return ret; } if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) return vxlan_multicast_leave_vnigrp(vxlan); return 0; } |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> */ #include <linux/netdevice.h> #include <linux/types.h> #include <linux/skbuff.h> #include <linux/debugfs.h> #include <linux/ieee80211.h> #include <linux/export.h> #include <net/mac80211.h> #include "rc80211_minstrel_ht.h" struct minstrel_debugfs_info { size_t len; char buf[]; }; static ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct minstrel_debugfs_info *ms; ms = file->private_data; return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len); } static int minstrel_stats_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } static bool minstrel_ht_is_sample_rate(struct minstrel_ht_sta *mi, int idx) { int type, i; for (type = 0; type < ARRAY_SIZE(mi->sample); type++) for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) if (mi->sample[type].cur_sample_rates[i] == idx) return true; return false; } static char * minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) { const struct mcs_group *mg; unsigned int j, tp_max, tp_avg, eprob, tx_time; char htmode = '2'; char gimode = 'L'; u32 gflags; if (!mi->supported[i]) return p; mg = &minstrel_mcs_groups[i]; gflags = mg->flags; if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) htmode = '4'; else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) htmode = '8'; if (gflags & IEEE80211_TX_RC_SHORT_GI) gimode = 'S'; for (j = 0; j < MCS_GROUP_RATES; j++) { struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; int idx = MI_RATE(i, j); unsigned int duration; if (!(mi->supported[i] & BIT(j))) continue; if (gflags & IEEE80211_TX_RC_MCS) { p += sprintf(p, "HT%c0 ", htmode); p += sprintf(p, "%cGI ", gimode); p += sprintf(p, "%d ", mg->streams); } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { p += sprintf(p, "VHT%c0 ", htmode); p += sprintf(p, "%cGI ", gimode); p += sprintf(p, "%d ", mg->streams); } else if (i == MINSTREL_OFDM_GROUP) { p += sprintf(p, "OFDM "); p += sprintf(p, "1 "); } else { p += sprintf(p, "CCK "); p += sprintf(p, "%cP ", j < 4 ? 'L' : 'S'); p += sprintf(p, "1 "); } *(p++) = (idx == mi->max_tp_rate[0]) ? 'A' : ' '; *(p++) = (idx == mi->max_tp_rate[1]) ? 'B' : ' '; *(p++) = (idx == mi->max_tp_rate[2]) ? 'C' : ' '; *(p++) = (idx == mi->max_tp_rate[3]) ? 'D' : ' '; *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; *(p++) = minstrel_ht_is_sample_rate(mi, idx) ? 'S' : ' '; if (gflags & IEEE80211_TX_RC_MCS) { p += sprintf(p, " MCS%-2u", (mg->streams - 1) * 8 + j); } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { p += sprintf(p, " MCS%-1u/%1u", j, mg->streams); } else { int r; if (i == MINSTREL_OFDM_GROUP) r = minstrel_ofdm_bitrates[j % 8]; else r = minstrel_cck_bitrates[j % 4]; p += sprintf(p, " %2u.%1uM", r / 10, r % 10); } p += sprintf(p, " %3u ", idx); /* tx_time[rate(i)] in usec */ duration = mg->duration[j]; duration <<= mg->shift; tx_time = DIV_ROUND_CLOSEST(duration, 1000); p += sprintf(p, "%6u ", tx_time); tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg); eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u" " %3u %3u %-3u " "%9llu %-9llu\n", tp_max / 10, tp_max % 10, tp_avg / 10, tp_avg % 10, eprob / 10, eprob % 10, mrs->retry_count, mrs->last_success, mrs->last_attempts, (unsigned long long)mrs->succ_hist, (unsigned long long)mrs->att_hist); } return p; } static int minstrel_ht_stats_open(struct inode *inode, struct file *file) { struct minstrel_ht_sta *mi = inode->i_private; struct minstrel_debugfs_info *ms; unsigned int i; char *p; ms = kmalloc(32768, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; p += sprintf(p, "\n"); p += sprintf(p, " best ____________rate__________ ____statistics___ _____last____ ______sum-of________\n"); p += sprintf(p, "mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts]\n"); p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p); for (i = 0; i < MINSTREL_CCK_GROUP; i++) p = minstrel_ht_stats_dump(mi, i, p); for (i++; i < ARRAY_SIZE(mi->groups); i++) p = minstrel_ht_stats_dump(mi, i, p); p += sprintf(p, "\nTotal packet count:: ideal %d " "lookaround %d\n", max(0, (int) mi->total_packets - (int) mi->sample_packets), mi->sample_packets); if (mi->avg_ampdu_len) p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n", MINSTREL_TRUNC(mi->avg_ampdu_len), MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); ms->len = p - ms->buf; WARN_ON(ms->len + sizeof(*ms) > 32768); return nonseekable_open(inode, file); } static const struct file_operations minstrel_ht_stat_fops = { .owner = THIS_MODULE, .open = minstrel_ht_stats_open, .read = minstrel_stats_read, .release = minstrel_stats_release, .llseek = no_llseek, }; static char * minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) { const struct mcs_group *mg; unsigned int j, tp_max, tp_avg, eprob, tx_time; char htmode = '2'; char gimode = 'L'; u32 gflags; if (!mi->supported[i]) return p; mg = &minstrel_mcs_groups[i]; gflags = mg->flags; if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) htmode = '4'; else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) htmode = '8'; if (gflags & IEEE80211_TX_RC_SHORT_GI) gimode = 'S'; for (j = 0; j < MCS_GROUP_RATES; j++) { struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; int idx = MI_RATE(i, j); unsigned int duration; if (!(mi->supported[i] & BIT(j))) continue; if (gflags & IEEE80211_TX_RC_MCS) { p += sprintf(p, "HT%c0,", htmode); p += sprintf(p, "%cGI,", gimode); p += sprintf(p, "%d,", mg->streams); } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { p += sprintf(p, "VHT%c0,", htmode); p += sprintf(p, "%cGI,", gimode); p += sprintf(p, "%d,", mg->streams); } else if (i == MINSTREL_OFDM_GROUP) { p += sprintf(p, "OFDM,,1,"); } else { p += sprintf(p, "CCK,"); p += sprintf(p, "%cP,", j < 4 ? 'L' : 'S'); p += sprintf(p, "1,"); } p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[0]) ? "A" : "")); p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[1]) ? "B" : "")); p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[2]) ? "C" : "")); p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[3]) ? "D" : "")); p += sprintf(p, "%s" ,((idx == mi->max_prob_rate) ? "P" : "")); p += sprintf(p, "%s", (minstrel_ht_is_sample_rate(mi, idx) ? "S" : "")); if (gflags & IEEE80211_TX_RC_MCS) { p += sprintf(p, ",MCS%-2u,", (mg->streams - 1) * 8 + j); } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { p += sprintf(p, ",MCS%-1u/%1u,", j, mg->streams); } else { int r; if (i == MINSTREL_OFDM_GROUP) r = minstrel_ofdm_bitrates[j % 8]; else r = minstrel_cck_bitrates[j % 4]; p += sprintf(p, ",%2u.%1uM,", r / 10, r % 10); } p += sprintf(p, "%u,", idx); duration = mg->duration[j]; duration <<= mg->shift; tx_time = DIV_ROUND_CLOSEST(duration, 1000); p += sprintf(p, "%u,", tx_time); tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg); eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u," "%u,%llu,%llu,", tp_max / 10, tp_max % 10, tp_avg / 10, tp_avg % 10, eprob / 10, eprob % 10, mrs->retry_count, mrs->last_success, mrs->last_attempts, (unsigned long long)mrs->succ_hist, (unsigned long long)mrs->att_hist); p += sprintf(p, "%d,%d,%d.%d\n", max(0, (int) mi->total_packets - (int) mi->sample_packets), mi->sample_packets, MINSTREL_TRUNC(mi->avg_ampdu_len), MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); } return p; } static int minstrel_ht_stats_csv_open(struct inode *inode, struct file *file) { struct minstrel_ht_sta *mi = inode->i_private; struct minstrel_debugfs_info *ms; unsigned int i; char *p; ms = kmalloc(32768, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; p = minstrel_ht_stats_csv_dump(mi, MINSTREL_CCK_GROUP, p); for (i = 0; i < MINSTREL_CCK_GROUP; i++) p = minstrel_ht_stats_csv_dump(mi, i, p); for (i++; i < ARRAY_SIZE(mi->groups); i++) p = minstrel_ht_stats_csv_dump(mi, i, p); ms->len = p - ms->buf; WARN_ON(ms->len + sizeof(*ms) > 32768); return nonseekable_open(inode, file); } static const struct file_operations minstrel_ht_stat_csv_fops = { .owner = THIS_MODULE, .open = minstrel_ht_stats_csv_open, .read = minstrel_stats_read, .release = minstrel_stats_release, .llseek = no_llseek, }; void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) { debugfs_create_file("rc_stats", 0444, dir, priv_sta, &minstrel_ht_stat_fops); debugfs_create_file("rc_stats_csv", 0444, dir, priv_sta, &minstrel_ht_stat_csv_fops); } |
2 1 27 27 27 27 27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | // SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include <trace/events/mmap_lock.h> #include <linux/mm.h> #include <linux/cgroup.h> #include <linux/memcontrol.h> #include <linux/mmap_lock.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/smp.h> #include <linux/trace_events.h> #include <linux/local_lock.h> EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); #ifdef CONFIG_MEMCG static atomic_t reg_refcount; /* * Size of the buffer for memcg path names. Ignoring stack trace support, * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it. */ #define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL int trace_mmap_lock_reg(void) { atomic_inc(®_refcount); return 0; } void trace_mmap_lock_unreg(void) { atomic_dec(®_refcount); } #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ do { \ char buf[MEMCG_PATH_BUF_SIZE]; \ get_mm_memcg_path(mm, buf, sizeof(buf)); \ trace_mmap_lock_##type(mm, buf, ##__VA_ARGS__); \ } while (0) #else /* !CONFIG_MEMCG */ int trace_mmap_lock_reg(void) { return 0; } void trace_mmap_lock_unreg(void) { } #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ trace_mmap_lock_##type(mm, "", ##__VA_ARGS__) #endif /* CONFIG_MEMCG */ #ifdef CONFIG_TRACING #ifdef CONFIG_MEMCG /* * Write the given mm_struct's memcg path to a buffer. If the path cannot be * determined or the trace event is being unregistered, empty string is written. */ static void get_mm_memcg_path(struct mm_struct *mm, char *buf, size_t buflen) { struct mem_cgroup *memcg; buf[0] = '\0'; /* No need to get path if no trace event is registered. */ if (!atomic_read(®_refcount)) return; memcg = get_mem_cgroup_from_mm(mm); if (memcg == NULL) return; if (memcg->css.cgroup) cgroup_path(memcg->css.cgroup, buf, buflen); css_put(&memcg->css); } #endif /* CONFIG_MEMCG */ /* * Trace calls must be in a separate file, as otherwise there's a circular * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. */ void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) { TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, bool success) { TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); } EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) { TRACE_MMAP_LOCK_EVENT(released, mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_released); #endif /* CONFIG_TRACING */ |
116 105 1 51 5 2 6 3 10 6 20 11 76 45 70 45 45 2 26 5 4 3 1 38 4 41 1 37 37 5 24 11 2 4 57 1 9 13 39 15 2 13 39 11 5 4 35 1 3 5 11 1 59 60 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH * Copyright (C) 2018 - 2024 Intel Corporation */ #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __MAC80211_DRIVER_TRACE #include <linux/tracepoint.h> #include <net/mac80211.h> #include "ieee80211_i.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211 #define MAXNAME 32 #define LOCAL_ENTRY __array(char, wiphy_name, 32) #define LOCAL_ASSIGN strscpy(__entry->wiphy_name, wiphy_name(local->hw.wiphy), MAXNAME) #define LOCAL_PR_FMT "%s" #define LOCAL_PR_ARG __entry->wiphy_name #define STA_ENTRY __array(char, sta_addr, ETH_ALEN) #define STA_ASSIGN (sta ? memcpy(__entry->sta_addr, sta->addr, ETH_ALEN) : \ eth_zero_addr(__entry->sta_addr)) #define STA_NAMED_ASSIGN(s) memcpy(__entry->sta_addr, (s)->addr, ETH_ALEN) #define STA_PR_FMT " sta:%pM" #define STA_PR_ARG __entry->sta_addr #define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ __field(bool, p2p) \ __string(vif_name, sdata->name) #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ __assign_str(vif_name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" #define CHANDEF_ENTRY __field(u32, control_freq) \ __field(u32, freq_offset) \ __field(u32, chan_width) \ __field(u32, center_freq1) \ __field(u32, freq1_offset) \ __field(u32, center_freq2) #define CHANDEF_ASSIGN(c) \ __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \ __entry->freq_offset = (c) ? ((c)->chan ? (c)->chan->freq_offset : 0) : 0; \ __entry->chan_width = (c) ? (c)->width : 0; \ __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ __entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \ __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; #define CHANDEF_PR_FMT " chandef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" #define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \ __entry->center_freq1, __entry->freq1_offset, __entry->center_freq2 #define MIN_CHANDEF_ENTRY \ __field(u32, min_control_freq) \ __field(u32, min_freq_offset) \ __field(u32, min_chan_width) \ __field(u32, min_center_freq1) \ __field(u32, min_freq1_offset) \ __field(u32, min_center_freq2) #define MIN_CHANDEF_ASSIGN(c) \ __entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ __entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0; \ __entry->min_chan_width = (c)->width; \ __entry->min_center_freq1 = (c)->center_freq1; \ __entry->min_freq1_offset = (c)->freq1_offset; \ __entry->min_center_freq2 = (c)->center_freq2; #define MIN_CHANDEF_PR_FMT " mindef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" #define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \ __entry->min_chan_width, \ __entry->min_center_freq1, __entry->min_freq1_offset, \ __entry->min_center_freq2 #define AP_CHANDEF_ENTRY \ __field(u32, ap_control_freq) \ __field(u32, ap_freq_offset) \ __field(u32, ap_chan_width) \ __field(u32, ap_center_freq1) \ __field(u32, ap_freq1_offset) \ __field(u32, ap_center_freq2) #define AP_CHANDEF_ASSIGN(c) \ __entry->ap_control_freq = (c)->chan ? (c)->chan->center_freq : 0;\ __entry->ap_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0;\ __entry->ap_chan_width = (c)->chan ? (c)->width : 0; \ __entry->ap_center_freq1 = (c)->chan ? (c)->center_freq1 : 0; \ __entry->ap_freq1_offset = (c)->chan ? (c)->freq1_offset : 0; \ __entry->ap_center_freq2 = (c)->chan ? (c)->center_freq2 : 0; #define AP_CHANDEF_PR_FMT " ap(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" #define AP_CHANDEF_PR_ARG __entry->ap_control_freq, __entry->ap_freq_offset, \ __entry->ap_chan_width, \ __entry->ap_center_freq1, __entry->ap_freq1_offset, \ __entry->ap_center_freq2 #define CHANCTX_ENTRY CHANDEF_ENTRY \ MIN_CHANDEF_ENTRY \ AP_CHANDEF_ENTRY \ __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) #define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ MIN_CHANDEF_ASSIGN(&ctx->conf.min_def) \ AP_CHANDEF_ASSIGN(&ctx->conf.ap) \ __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic #define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT AP_CHANDEF_PR_FMT " chains:%d/%d" #define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, AP_CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic #define KEY_ENTRY __field(u32, cipher) \ __field(u8, hw_key_idx) \ __field(u8, flags) \ __field(s8, keyidx) #define KEY_ASSIGN(k) __entry->cipher = (k)->cipher; \ __entry->flags = (k)->flags; \ __entry->keyidx = (k)->keyidx; \ __entry->hw_key_idx = (k)->hw_key_idx; #define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d" #define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx #define AMPDU_ACTION_ENTRY __field(enum ieee80211_ampdu_mlme_action, \ ieee80211_ampdu_mlme_action) \ STA_ENTRY \ __field(u16, tid) \ __field(u16, ssn) \ __field(u16, buf_size) \ __field(bool, amsdu) \ __field(u16, timeout) \ __field(u16, action) #define AMPDU_ACTION_ASSIGN STA_NAMED_ASSIGN(params->sta); \ __entry->tid = params->tid; \ __entry->ssn = params->ssn; \ __entry->buf_size = params->buf_size; \ __entry->amsdu = params->amsdu; \ __entry->timeout = params->timeout; \ __entry->action = params->action; #define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d action %d" #define AMPDU_ACTION_PR_ARG STA_PR_ARG, __entry->tid, __entry->ssn, \ __entry->buf_size, __entry->amsdu, __entry->timeout, \ __entry->action /* * Tracing for driver callbacks. */ DECLARE_EVENT_CLASS(local_only_evt, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; ), TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) ); DECLARE_EVENT_CLASS(local_sdata_addr_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __array(char, addr, ETH_ALEN) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; memcpy(__entry->addr, sdata->vif.addr, ETH_ALEN); ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " addr:%pM", LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr ) ); DECLARE_EVENT_CLASS(local_u32_evt, TP_PROTO(struct ieee80211_local *local, u32 value), TP_ARGS(local, value), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, value) ), TP_fast_assign( LOCAL_ASSIGN; __entry->value = value; ), TP_printk( LOCAL_PR_FMT " value:%d", LOCAL_PR_ARG, __entry->value ) ); DECLARE_EVENT_CLASS(local_sdata_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG ) ); DEFINE_EVENT(local_only_evt, drv_return_void, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); TRACE_EVENT(drv_return_int, TP_PROTO(struct ieee80211_local *local, int ret), TP_ARGS(local, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret) ); TRACE_EVENT(drv_return_bool, TP_PROTO(struct ieee80211_local *local, bool ret), TP_ARGS(local, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), TP_printk(LOCAL_PR_FMT " - %s", LOCAL_PR_ARG, (__entry->ret) ? "true" : "false") ); TRACE_EVENT(drv_return_u32, TP_PROTO(struct ieee80211_local *local, u32 ret), TP_ARGS(local, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret) ); TRACE_EVENT(drv_return_u64, TP_PROTO(struct ieee80211_local *local, u64 ret), TP_ARGS(local, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(u64, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret) ); DEFINE_EVENT(local_only_evt, drv_start, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); DEFINE_EVENT(local_u32_evt, drv_get_et_strings, TP_PROTO(struct ieee80211_local *local, u32 sset), TP_ARGS(local, sset) ); DEFINE_EVENT(local_u32_evt, drv_get_et_sset_count, TP_PROTO(struct ieee80211_local *local, u32 sset), TP_ARGS(local, sset) ); DEFINE_EVENT(local_only_evt, drv_get_et_stats, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); DEFINE_EVENT(local_only_evt, drv_suspend, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); DEFINE_EVENT(local_only_evt, drv_resume, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); TRACE_EVENT(drv_set_wakeup, TP_PROTO(struct ieee80211_local *local, bool enabled), TP_ARGS(local, enabled), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, enabled) ), TP_fast_assign( LOCAL_ASSIGN; __entry->enabled = enabled; ), TP_printk(LOCAL_PR_FMT " enabled:%d", LOCAL_PR_ARG, __entry->enabled) ); TRACE_EVENT(drv_stop, TP_PROTO(struct ieee80211_local *local, bool suspend), TP_ARGS(local, suspend), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, suspend) ), TP_fast_assign( LOCAL_ASSIGN; __entry->suspend = suspend; ), TP_printk(LOCAL_PR_FMT " suspend:%d", LOCAL_PR_ARG, __entry->suspend) ); DEFINE_EVENT(local_sdata_addr_evt, drv_add_interface, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_change_interface, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type, bool p2p), TP_ARGS(local, sdata, type, p2p), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, new_type) __field(bool, new_p2p) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->new_type = type; __entry->new_p2p = p2p; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " new type:%d%s", LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type, __entry->new_p2p ? "/p2p" : "" ) ); DEFINE_EVENT(local_sdata_addr_evt, drv_remove_interface, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_config, TP_PROTO(struct ieee80211_local *local, u32 changed), TP_ARGS(local, changed), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, changed) __field(u32, flags) __field(int, power_level) __field(int, dynamic_ps_timeout) __field(u16, listen_interval) __field(u8, long_frame_max_tx_count) __field(u8, short_frame_max_tx_count) CHANDEF_ENTRY __field(int, smps) ), TP_fast_assign( LOCAL_ASSIGN; __entry->changed = changed; __entry->flags = local->hw.conf.flags; __entry->power_level = local->hw.conf.power_level; __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; __entry->listen_interval = local->hw.conf.listen_interval; __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count; __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; CHANDEF_ASSIGN(&local->hw.conf.chandef) __entry->smps = local->hw.conf.smps_mode; ), TP_printk( LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT, LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG ) ); TRACE_EVENT(drv_vif_cfg_changed, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u64 changed), TP_ARGS(local, sdata, changed), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u64, changed) __field(bool, assoc) __field(bool, ibss_joined) __field(bool, ibss_creator) __field(u16, aid) __dynamic_array(u32, arp_addr_list, sdata->vif.cfg.arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? IEEE80211_BSS_ARP_ADDR_LIST_LEN : sdata->vif.cfg.arp_addr_cnt) __field(int, arp_addr_cnt) __dynamic_array(u8, ssid, sdata->vif.cfg.ssid_len) __field(int, s1g) __field(bool, idle) __field(bool, ps) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->changed = changed; __entry->aid = sdata->vif.cfg.aid; __entry->assoc = sdata->vif.cfg.assoc; __entry->ibss_joined = sdata->vif.cfg.ibss_joined; __entry->ibss_creator = sdata->vif.cfg.ibss_creator; __entry->ps = sdata->vif.cfg.ps; __entry->arp_addr_cnt = sdata->vif.cfg.arp_addr_cnt; memcpy(__get_dynamic_array(arp_addr_list), sdata->vif.cfg.arp_addr_list, sizeof(u32) * (sdata->vif.cfg.arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? IEEE80211_BSS_ARP_ADDR_LIST_LEN : sdata->vif.cfg.arp_addr_cnt)); memcpy(__get_dynamic_array(ssid), sdata->vif.cfg.ssid, sdata->vif.cfg.ssid_len); __entry->s1g = sdata->vif.cfg.s1g; __entry->idle = sdata->vif.cfg.idle; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " changed:%#llx", LOCAL_PR_ARG, VIF_PR_ARG, __entry->changed ) ); TRACE_EVENT(drv_link_info_changed, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, u64 changed), TP_ARGS(local, sdata, link_conf, changed), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u64, changed) __field(int, link_id) __field(bool, cts) __field(bool, shortpre) __field(bool, shortslot) __field(bool, enable_beacon) __field(u8, dtimper) __field(u16, bcnint) __field(u16, assoc_cap) __field(u64, sync_tsf) __field(u32, sync_device_ts) __field(u8, sync_dtim_count) __field(u32, basic_rates) __array(int, mcast_rate, NUM_NL80211_BANDS) __field(u16, ht_operation_mode) __field(s32, cqm_rssi_thold) __field(s32, cqm_rssi_hyst) __field(u32, channel_width) __field(u32, channel_cfreq1) __field(u32, channel_cfreq1_offset) __field(bool, qos) __field(bool, hidden_ssid) __field(int, txpower) __field(u8, p2p_oppps_ctwindow) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->changed = changed; __entry->link_id = link_conf->link_id; __entry->shortpre = link_conf->use_short_preamble; __entry->cts = link_conf->use_cts_prot; __entry->shortslot = link_conf->use_short_slot; __entry->enable_beacon = link_conf->enable_beacon; __entry->dtimper = link_conf->dtim_period; __entry->bcnint = link_conf->beacon_int; __entry->assoc_cap = link_conf->assoc_capability; __entry->sync_tsf = link_conf->sync_tsf; __entry->sync_device_ts = link_conf->sync_device_ts; __entry->sync_dtim_count = link_conf->sync_dtim_count; __entry->basic_rates = link_conf->basic_rates; memcpy(__entry->mcast_rate, link_conf->mcast_rate, sizeof(__entry->mcast_rate)); __entry->ht_operation_mode = link_conf->ht_operation_mode; __entry->cqm_rssi_thold = link_conf->cqm_rssi_thold; __entry->cqm_rssi_hyst = link_conf->cqm_rssi_hyst; __entry->channel_width = link_conf->chanreq.oper.width; __entry->channel_cfreq1 = link_conf->chanreq.oper.center_freq1; __entry->channel_cfreq1_offset = link_conf->chanreq.oper.freq1_offset; __entry->qos = link_conf->qos; __entry->hidden_ssid = link_conf->hidden_ssid; __entry->txpower = link_conf->txpower; __entry->p2p_oppps_ctwindow = link_conf->p2p_noa_attr.oppps_ctwindow; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " link_id:%d, changed:%#llx", LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->changed ) ); TRACE_EVENT(drv_prepare_multicast, TP_PROTO(struct ieee80211_local *local, int mc_count), TP_ARGS(local, mc_count), TP_STRUCT__entry( LOCAL_ENTRY __field(int, mc_count) ), TP_fast_assign( LOCAL_ASSIGN; __entry->mc_count = mc_count; ), TP_printk( LOCAL_PR_FMT " prepare mc (%d)", LOCAL_PR_ARG, __entry->mc_count ) ); TRACE_EVENT(drv_configure_filter, TP_PROTO(struct ieee80211_local *local, unsigned int changed_flags, unsigned int *total_flags, u64 multicast), TP_ARGS(local, changed_flags, total_flags, multicast), TP_STRUCT__entry( LOCAL_ENTRY __field(unsigned int, changed) __field(unsigned int, total) __field(u64, multicast) ), TP_fast_assign( LOCAL_ASSIGN; __entry->changed = changed_flags; __entry->total = *total_flags; __entry->multicast = multicast; ), TP_printk( LOCAL_PR_FMT " changed:%#x total:%#x", LOCAL_PR_ARG, __entry->changed, __entry->total ) ); TRACE_EVENT(drv_config_iface_filter, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, unsigned int filter_flags, unsigned int changed_flags), TP_ARGS(local, sdata, filter_flags, changed_flags), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(unsigned int, filter_flags) __field(unsigned int, changed_flags) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->filter_flags = filter_flags; __entry->changed_flags = changed_flags; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " filter_flags: %#x changed_flags: %#x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->filter_flags, __entry->changed_flags ) ); TRACE_EVENT(drv_set_tim, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, bool set), TP_ARGS(local, sta, set), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(bool, set) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->set = set; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT " set:%d", LOCAL_PR_ARG, STA_PR_ARG, __entry->set ) ); TRACE_EVENT(drv_set_key, TP_PROTO(struct ieee80211_local *local, enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, struct ieee80211_key_conf *key), TP_ARGS(local, cmd, sdata, sta, key), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u32, cmd) KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->cmd = cmd; KEY_ASSIGN(key); ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " cmd: %d" KEY_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->cmd, KEY_PR_ARG ) ); TRACE_EVENT(drv_update_tkip_key, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_key_conf *conf, struct ieee80211_sta *sta, u32 iv32), TP_ARGS(local, sdata, conf, sta, iv32), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u32, iv32) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->iv32 = iv32; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->iv32 ) ); DEFINE_EVENT(local_sdata_evt, drv_hw_scan, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DEFINE_EVENT(local_sdata_evt, drv_cancel_hw_scan, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DEFINE_EVENT(local_sdata_evt, drv_sched_scan_start, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DEFINE_EVENT(local_sdata_evt, drv_sched_scan_stop, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_sw_scan_start, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, const u8 *mac_addr), TP_ARGS(local, sdata, mac_addr), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __array(char, mac_addr, ETH_ALEN) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; memcpy(__entry->mac_addr, mac_addr, ETH_ALEN); ), TP_printk(LOCAL_PR_FMT ", " VIF_PR_FMT ", addr:%pM", LOCAL_PR_ARG, VIF_PR_ARG, __entry->mac_addr) ); DEFINE_EVENT(local_sdata_evt, drv_sw_scan_complete, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_get_stats, TP_PROTO(struct ieee80211_local *local, struct ieee80211_low_level_stats *stats, int ret), TP_ARGS(local, stats, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(int, ret) __field(unsigned int, ackfail) __field(unsigned int, rtsfail) __field(unsigned int, fcserr) __field(unsigned int, rtssucc) ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; __entry->ackfail = stats->dot11ACKFailureCount; __entry->rtsfail = stats->dot11RTSFailureCount; __entry->fcserr = stats->dot11FCSErrorCount; __entry->rtssucc = stats->dot11RTSSuccessCount; ), TP_printk( LOCAL_PR_FMT " ret:%d", LOCAL_PR_ARG, __entry->ret ) ); TRACE_EVENT(drv_get_key_seq, TP_PROTO(struct ieee80211_local *local, struct ieee80211_key_conf *key), TP_ARGS(local, key), TP_STRUCT__entry( LOCAL_ENTRY KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; KEY_ASSIGN(key); ), TP_printk( LOCAL_PR_FMT KEY_PR_FMT, LOCAL_PR_ARG, KEY_PR_ARG ) ); DEFINE_EVENT(local_u32_evt, drv_set_frag_threshold, TP_PROTO(struct ieee80211_local *local, u32 value), TP_ARGS(local, value) ); DEFINE_EVENT(local_u32_evt, drv_set_rts_threshold, TP_PROTO(struct ieee80211_local *local, u32 value), TP_ARGS(local, value) ); TRACE_EVENT(drv_set_coverage_class, TP_PROTO(struct ieee80211_local *local, s16 value), TP_ARGS(local, value), TP_STRUCT__entry( LOCAL_ENTRY __field(s16, value) ), TP_fast_assign( LOCAL_ASSIGN; __entry->value = value; ), TP_printk( LOCAL_PR_FMT " value:%d", LOCAL_PR_ARG, __entry->value ) ); TRACE_EVENT(drv_sta_notify, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum sta_notify_cmd cmd, struct ieee80211_sta *sta), TP_ARGS(local, sdata, cmd, sta), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u32, cmd) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->cmd = cmd; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " cmd:%d", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->cmd ) ); TRACE_EVENT(drv_sta_state, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state), TP_ARGS(local, sdata, sta, old_state, new_state), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u32, old_state) __field(u32, new_state) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->old_state = old_state; __entry->new_state = new_state; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " state: %d->%d", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->old_state, __entry->new_state ) ); TRACE_EVENT(drv_sta_set_txpwr, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(s16, txpwr) __field(u8, type) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->txpwr = sta->deflink.txpwr.power; __entry->type = sta->deflink.txpwr.type; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " txpwr: %d type %d", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->txpwr, __entry->type ) ); TRACE_EVENT(drv_sta_rc_update, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, u32 changed), TP_ARGS(local, sdata, sta, changed), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u32, changed) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->changed = changed; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " changed: 0x%x", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->changed ) ); DECLARE_EVENT_CLASS(sta_event, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG ) ); DEFINE_EVENT(sta_event, drv_sta_statistics, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); DEFINE_EVENT(sta_event, drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); DEFINE_EVENT(sta_event, drv_sta_remove, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); DEFINE_EVENT(sta_event, drv_sta_pre_rcu_remove, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); DEFINE_EVENT(sta_event, drv_sync_rx_queues, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); DEFINE_EVENT(sta_event, drv_sta_rate_tbl_update, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); TRACE_EVENT(drv_conf_tx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, unsigned int link_id, u16 ac, const struct ieee80211_tx_queue_params *params), TP_ARGS(local, sdata, link_id, ac, params), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(unsigned int, link_id) __field(u16, ac) __field(u16, txop) __field(u16, cw_min) __field(u16, cw_max) __field(u8, aifs) __field(bool, uapsd) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->link_id = link_id; __entry->ac = ac; __entry->txop = params->txop; __entry->cw_max = params->cw_max; __entry->cw_min = params->cw_min; __entry->aifs = params->aifs; __entry->uapsd = params->uapsd; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " link_id: %d, AC:%d", LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->ac ) ); DEFINE_EVENT(local_sdata_evt, drv_get_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_set_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u64 tsf), TP_ARGS(local, sdata, tsf), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u64, tsf) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->tsf = tsf; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " tsf:%llu", LOCAL_PR_ARG, VIF_PR_ARG, (unsigned long long)__entry->tsf ) ); TRACE_EVENT(drv_offset_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, s64 offset), TP_ARGS(local, sdata, offset), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(s64, tsf_offset) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->tsf_offset = offset; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " tsf offset:%lld", LOCAL_PR_ARG, VIF_PR_ARG, (unsigned long long)__entry->tsf_offset ) ); DEFINE_EVENT(local_sdata_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DEFINE_EVENT(local_only_evt, drv_tx_last_beacon, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); TRACE_EVENT(drv_ampdu_action, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_ampdu_params *params), TP_ARGS(local, sdata, params), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY AMPDU_ACTION_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; AMPDU_ACTION_ASSIGN; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT AMPDU_ACTION_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, AMPDU_ACTION_PR_ARG ) ); TRACE_EVENT(drv_get_survey, TP_PROTO(struct ieee80211_local *local, int _idx, struct survey_info *survey), TP_ARGS(local, _idx, survey), TP_STRUCT__entry( LOCAL_ENTRY __field(int, idx) ), TP_fast_assign( LOCAL_ASSIGN; __entry->idx = _idx; ), TP_printk( LOCAL_PR_FMT " idx:%d", LOCAL_PR_ARG, __entry->idx ) ); TRACE_EVENT(drv_flush, TP_PROTO(struct ieee80211_local *local, u32 queues, bool drop), TP_ARGS(local, queues, drop), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, drop) __field(u32, queues) ), TP_fast_assign( LOCAL_ASSIGN; __entry->drop = drop; __entry->queues = queues; ), TP_printk( LOCAL_PR_FMT " queues:0x%x drop:%d", LOCAL_PR_ARG, __entry->queues, __entry->drop ) ); DEFINE_EVENT(sta_event, drv_flush_sta, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); DECLARE_EVENT_CLASS(chanswitch_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch *ch_switch), TP_ARGS(local, sdata, ch_switch), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY CHANDEF_ENTRY __field(u64, timestamp) __field(u32, device_timestamp) __field(bool, block_tx) __field(u8, count) __field(u8, link_id) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; CHANDEF_ASSIGN(&ch_switch->chandef) __entry->timestamp = ch_switch->timestamp; __entry->device_timestamp = ch_switch->device_timestamp; __entry->block_tx = ch_switch->block_tx; __entry->count = ch_switch->count; __entry->link_id = ch_switch->link_id; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu device_ts:%u link_id:%d", LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count, __entry->block_tx, __entry->timestamp, __entry->device_timestamp, __entry->link_id ) ); DEFINE_EVENT(chanswitch_evt, drv_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch *ch_switch), TP_ARGS(local, sdata, ch_switch) ); TRACE_EVENT(drv_set_antenna, TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), TP_ARGS(local, tx_ant, rx_ant, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, tx_ant) __field(u32, rx_ant) __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->tx_ant = tx_ant; __entry->rx_ant = rx_ant; __entry->ret = ret; ), TP_printk( LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret ) ); TRACE_EVENT(drv_get_antenna, TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), TP_ARGS(local, tx_ant, rx_ant, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, tx_ant) __field(u32, rx_ant) __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->tx_ant = tx_ant; __entry->rx_ant = rx_ant; __entry->ret = ret; ), TP_printk( LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret ) ); TRACE_EVENT(drv_remain_on_channel, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, unsigned int duration, enum ieee80211_roc_type type), TP_ARGS(local, sdata, chan, duration, type), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, center_freq) __field(int, freq_offset) __field(unsigned int, duration) __field(u32, type) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->center_freq = chan->center_freq; __entry->freq_offset = chan->freq_offset; __entry->duration = duration; __entry->type = type; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " freq:%d.%03dMHz duration:%dms type=%d", LOCAL_PR_ARG, VIF_PR_ARG, __entry->center_freq, __entry->freq_offset, __entry->duration, __entry->type ) ); DEFINE_EVENT(local_sdata_evt, drv_cancel_remain_on_channel, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_set_ringparam, TP_PROTO(struct ieee80211_local *local, u32 tx, u32 rx), TP_ARGS(local, tx, rx), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, tx) __field(u32, rx) ), TP_fast_assign( LOCAL_ASSIGN; __entry->tx = tx; __entry->rx = rx; ), TP_printk( LOCAL_PR_FMT " tx:%d rx %d", LOCAL_PR_ARG, __entry->tx, __entry->rx ) ); TRACE_EVENT(drv_get_ringparam, TP_PROTO(struct ieee80211_local *local, u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max), TP_ARGS(local, tx, tx_max, rx, rx_max), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, tx) __field(u32, tx_max) __field(u32, rx) __field(u32, rx_max) ), TP_fast_assign( LOCAL_ASSIGN; __entry->tx = *tx; __entry->tx_max = *tx_max; __entry->rx = *rx; __entry->rx_max = *rx_max; ), TP_printk( LOCAL_PR_FMT " tx:%d tx_max %d rx %d rx_max %d", LOCAL_PR_ARG, __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max ) ); DEFINE_EVENT(local_only_evt, drv_tx_frames_pending, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); TRACE_EVENT(drv_set_bitrate_mask, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, const struct cfg80211_bitrate_mask *mask), TP_ARGS(local, sdata, mask), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, legacy_2g) __field(u32, legacy_5g) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->legacy_2g = mask->control[NL80211_BAND_2GHZ].legacy; __entry->legacy_5g = mask->control[NL80211_BAND_5GHZ].legacy; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " 2G Mask:0x%x 5G Mask:0x%x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->legacy_2g, __entry->legacy_5g ) ); TRACE_EVENT(drv_set_rekey_data, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data), TP_ARGS(local, sdata, data), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __array(u8, kek, NL80211_KEK_LEN) __array(u8, kck, NL80211_KCK_LEN) __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; memcpy(__entry->kek, data->kek, NL80211_KEK_LEN); memcpy(__entry->kck, data->kck, NL80211_KCK_LEN); memcpy(__entry->replay_ctr, data->replay_ctr, NL80211_REPLAY_CTR_LEN); ), TP_printk(LOCAL_PR_FMT VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG) ); TRACE_EVENT(drv_event_callback, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, const struct ieee80211_event *_event), TP_ARGS(local, sdata, _event), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, type) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->type = _event->type; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " event:%d", LOCAL_PR_ARG, VIF_PR_ARG, __entry->type ) ); DECLARE_EVENT_CLASS(release_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, u16 tids, int num_frames, enum ieee80211_frame_release_type reason, bool more_data), TP_ARGS(local, sta, tids, num_frames, reason, more_data), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(u16, tids) __field(int, num_frames) __field(int, reason) __field(bool, more_data) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->tids = tids; __entry->num_frames = num_frames; __entry->reason = reason; __entry->more_data = more_data; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT " TIDs:0x%.4x frames:%d reason:%d more:%d", LOCAL_PR_ARG, STA_PR_ARG, __entry->tids, __entry->num_frames, __entry->reason, __entry->more_data ) ); DEFINE_EVENT(release_evt, drv_release_buffered_frames, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, u16 tids, int num_frames, enum ieee80211_frame_release_type reason, bool more_data), TP_ARGS(local, sta, tids, num_frames, reason, more_data) ); DEFINE_EVENT(release_evt, drv_allow_buffered_frames, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, u16 tids, int num_frames, enum ieee80211_frame_release_type reason, bool more_data), TP_ARGS(local, sta, tids, num_frames, reason, more_data) ); DECLARE_EVENT_CLASS(mgd_prepare_complete_tx_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 duration, u16 subtype, bool success), TP_ARGS(local, sdata, duration, subtype, success), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, duration) __field(u16, subtype) __field(u8, success) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->duration = duration; __entry->subtype = subtype; __entry->success = success; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " duration: %u, subtype:0x%x, success:%d", LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration, __entry->subtype, __entry->success ) ); DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_prepare_tx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 duration, u16 subtype, bool success), TP_ARGS(local, sdata, duration, subtype, success) ); DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_complete_tx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 duration, u16 subtype, bool success), TP_ARGS(local, sdata, duration, subtype, success) ); DEFINE_EVENT(local_sdata_evt, drv_mgd_protect_tdls_discover, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DECLARE_EVENT_CLASS(local_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_chanctx *ctx), TP_ARGS(local, ctx), TP_STRUCT__entry( LOCAL_ENTRY CHANCTX_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; CHANCTX_ASSIGN; ), TP_printk( LOCAL_PR_FMT CHANCTX_PR_FMT, LOCAL_PR_ARG, CHANCTX_PR_ARG ) ); DEFINE_EVENT(local_chanctx, drv_add_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_chanctx *ctx), TP_ARGS(local, ctx) ); DEFINE_EVENT(local_chanctx, drv_remove_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_chanctx *ctx), TP_ARGS(local, ctx) ); TRACE_EVENT(drv_change_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, u32 changed), TP_ARGS(local, ctx, changed), TP_STRUCT__entry( LOCAL_ENTRY CHANCTX_ENTRY __field(u32, changed) ), TP_fast_assign( LOCAL_ASSIGN; CHANCTX_ASSIGN; __entry->changed = changed; ), TP_printk( LOCAL_PR_FMT CHANCTX_PR_FMT " changed:%#x", LOCAL_PR_ARG, CHANCTX_PR_ARG, __entry->changed ) ); #if !defined(__TRACE_VIF_ENTRY) #define __TRACE_VIF_ENTRY struct trace_vif_entry { enum nl80211_iftype vif_type; bool p2p; char vif_name[IFNAMSIZ]; } __packed; struct trace_chandef_entry { u32 control_freq; u32 freq_offset; u32 chan_width; u32 center_freq1; u32 freq1_offset; u32 center_freq2; } __packed; struct trace_switch_entry { struct trace_vif_entry vif; unsigned int link_id; struct trace_chandef_entry old_chandef; struct trace_chandef_entry new_chandef; } __packed; #define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from #endif TRACE_EVENT(drv_switch_vif_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs, enum ieee80211_chanctx_switch_mode mode), TP_ARGS(local, vifs, n_vifs, mode), TP_STRUCT__entry( LOCAL_ENTRY __field(int, n_vifs) __field(u32, mode) __dynamic_array(u8, vifs, sizeof(struct trace_switch_entry) * n_vifs) ), TP_fast_assign( LOCAL_ASSIGN; __entry->n_vifs = n_vifs; __entry->mode = mode; { struct trace_switch_entry *local_vifs = __get_dynamic_array(vifs); int i; for (i = 0; i < n_vifs; i++) { struct ieee80211_sub_if_data *sdata; sdata = container_of(vifs[i].vif, struct ieee80211_sub_if_data, vif); SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type); SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p); SWITCH_ENTRY_ASSIGN(link_id, link_conf->link_id); strncpy(local_vifs[i].vif.vif_name, sdata->name, sizeof(local_vifs[i].vif.vif_name)); SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, old_ctx->def.chan->center_freq); SWITCH_ENTRY_ASSIGN(old_chandef.freq_offset, old_ctx->def.chan->freq_offset); SWITCH_ENTRY_ASSIGN(old_chandef.chan_width, old_ctx->def.width); SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1, old_ctx->def.center_freq1); SWITCH_ENTRY_ASSIGN(old_chandef.freq1_offset, old_ctx->def.freq1_offset); SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2, old_ctx->def.center_freq2); SWITCH_ENTRY_ASSIGN(new_chandef.control_freq, new_ctx->def.chan->center_freq); SWITCH_ENTRY_ASSIGN(new_chandef.freq_offset, new_ctx->def.chan->freq_offset); SWITCH_ENTRY_ASSIGN(new_chandef.chan_width, new_ctx->def.width); SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1, new_ctx->def.center_freq1); SWITCH_ENTRY_ASSIGN(new_chandef.freq1_offset, new_ctx->def.freq1_offset); SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2, new_ctx->def.center_freq2); } } ), TP_printk( LOCAL_PR_FMT " n_vifs:%d mode:%d", LOCAL_PR_ARG, __entry->n_vifs, __entry->mode ) ); DECLARE_EVENT_CLASS(local_sdata_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, struct ieee80211_chanctx *ctx), TP_ARGS(local, sdata, link_conf, ctx), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY CHANCTX_ENTRY __field(unsigned int, link_id) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; CHANCTX_ASSIGN; __entry->link_id = link_conf->link_id; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " link_id:%d" CHANCTX_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, CHANCTX_PR_ARG ) ); DEFINE_EVENT(local_sdata_chanctx, drv_assign_vif_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, struct ieee80211_chanctx *ctx), TP_ARGS(local, sdata, link_conf, ctx) ); DEFINE_EVENT(local_sdata_chanctx, drv_unassign_vif_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, struct ieee80211_chanctx *ctx), TP_ARGS(local, sdata, link_conf, ctx) ); TRACE_EVENT(drv_start_ap, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf), TP_ARGS(local, sdata, link_conf), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, link_id) __field(u8, dtimper) __field(u16, bcnint) __dynamic_array(u8, ssid, sdata->vif.cfg.ssid_len) __field(bool, hidden_ssid) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->link_id = link_conf->link_id; __entry->dtimper = link_conf->dtim_period; __entry->bcnint = link_conf->beacon_int; __entry->hidden_ssid = link_conf->hidden_ssid; memcpy(__get_dynamic_array(ssid), sdata->vif.cfg.ssid, sdata->vif.cfg.ssid_len); ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " link id %u", LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id ) ); TRACE_EVENT(drv_stop_ap, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf), TP_ARGS(local, sdata, link_conf), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, link_id) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->link_id = link_conf->link_id; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " link id %u", LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id ) ); TRACE_EVENT(drv_reconfig_complete, TP_PROTO(struct ieee80211_local *local, enum ieee80211_reconfig_type reconfig_type), TP_ARGS(local, reconfig_type), TP_STRUCT__entry( LOCAL_ENTRY __field(u8, reconfig_type) ), TP_fast_assign( LOCAL_ASSIGN; __entry->reconfig_type = reconfig_type; ), TP_printk( LOCAL_PR_FMT " reconfig_type:%d", LOCAL_PR_ARG, __entry->reconfig_type ) ); #if IS_ENABLED(CONFIG_IPV6) DEFINE_EVENT(local_sdata_evt, drv_ipv6_addr_change, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); #endif TRACE_EVENT(drv_join_ibss, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *info), TP_ARGS(local, sdata, info), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u8, dtimper) __field(u16, bcnint) __dynamic_array(u8, ssid, sdata->vif.cfg.ssid_len) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->dtimper = info->dtim_period; __entry->bcnint = info->beacon_int; memcpy(__get_dynamic_array(ssid), sdata->vif.cfg.ssid, sdata->vif.cfg.ssid_len); ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG ) ); DEFINE_EVENT(local_sdata_evt, drv_leave_ibss, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_get_expected_throughput, TP_PROTO(struct ieee80211_sta *sta), TP_ARGS(sta), TP_STRUCT__entry( STA_ENTRY ), TP_fast_assign( STA_ASSIGN; ), TP_printk( STA_PR_FMT, STA_PR_ARG ) ); TRACE_EVENT(drv_start_nan, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_nan_conf *conf), TP_ARGS(local, sdata, conf), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u8, master_pref) __field(u8, bands) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->master_pref = conf->master_pref; __entry->bands = conf->bands; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT ", master preference: %u, bands: 0x%0x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, __entry->bands ) ); TRACE_EVENT(drv_stop_nan, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG ) ); TRACE_EVENT(drv_nan_change_conf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_nan_conf *conf, u32 changes), TP_ARGS(local, sdata, conf, changes), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u8, master_pref) __field(u8, bands) __field(u32, changes) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->master_pref = conf->master_pref; __entry->bands = conf->bands; __entry->changes = changes; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT ", master preference: %u, bands: 0x%0x, changes: 0x%x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, __entry->bands, __entry->changes ) ); TRACE_EVENT(drv_add_nan_func, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, const struct cfg80211_nan_func *func), TP_ARGS(local, sdata, func), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u8, type) __field(u8, inst_id) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->type = func->type; __entry->inst_id = func->instance_id; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT ", type: %u, inst_id: %u", LOCAL_PR_ARG, VIF_PR_ARG, __entry->type, __entry->inst_id ) ); TRACE_EVENT(drv_del_nan_func, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u8 instance_id), TP_ARGS(local, sdata, instance_id), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u8, instance_id) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->instance_id = instance_id; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT ", instance_id: %u", LOCAL_PR_ARG, VIF_PR_ARG, __entry->instance_id ) ); DEFINE_EVENT(local_sdata_evt, drv_start_pmsr, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DEFINE_EVENT(local_sdata_evt, drv_abort_pmsr, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(drv_set_default_unicast_key, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int key_idx), TP_ARGS(local, sdata, key_idx), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, key_idx) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->key_idx = key_idx; ), TP_printk(LOCAL_PR_FMT VIF_PR_FMT " key_idx:%d", LOCAL_PR_ARG, VIF_PR_ARG, __entry->key_idx) ); TRACE_EVENT(drv_channel_switch_beacon, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def *chandef), TP_ARGS(local, sdata, chandef), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY CHANDEF_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; CHANDEF_ASSIGN(chandef); ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " channel switch to " CHANDEF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG ) ); DEFINE_EVENT(chanswitch_evt, drv_pre_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch *ch_switch), TP_ARGS(local, sdata, ch_switch) ); DEFINE_EVENT(local_sdata_evt, drv_post_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DEFINE_EVENT(chanswitch_evt, drv_channel_switch_rx_beacon, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch *ch_switch), TP_ARGS(local, sdata, ch_switch) ); TRACE_EVENT(drv_get_txpower, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int dbm, int ret), TP_ARGS(local, sdata, dbm, ret), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, dbm) __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->dbm = dbm; __entry->ret = ret; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " dbm:%d ret:%d", LOCAL_PR_ARG, VIF_PR_ARG, __entry->dbm, __entry->ret ) ); TRACE_EVENT(drv_tdls_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, u8 oper_class, struct cfg80211_chan_def *chandef), TP_ARGS(local, sdata, sta, oper_class, chandef), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u8, oper_class) CHANDEF_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->oper_class = oper_class; CHANDEF_ASSIGN(chandef) ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " tdls channel switch to" CHANDEF_PR_FMT " oper_class:%d " STA_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->oper_class, STA_PR_ARG ) ); TRACE_EVENT(drv_tdls_cancel_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " tdls cancel channel switch with " STA_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG ) ); TRACE_EVENT(drv_tdls_recv_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_tdls_ch_sw_params *params), TP_ARGS(local, sdata, params), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u8, action_code) STA_ENTRY CHANDEF_ENTRY __field(u32, status) __field(bool, peer_initiator) __field(u32, timestamp) __field(u16, switch_time) __field(u16, switch_timeout) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_NAMED_ASSIGN(params->sta); CHANDEF_ASSIGN(params->chandef) __entry->peer_initiator = params->sta->tdls_initiator; __entry->action_code = params->action_code; __entry->status = params->status; __entry->timestamp = params->timestamp; __entry->switch_time = params->switch_time; __entry->switch_timeout = params->switch_timeout; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " received tdls channel switch packet" " action:%d status:%d time:%d switch time:%d switch" " timeout:%d initiator: %d chan:" CHANDEF_PR_FMT STA_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG, __entry->action_code, __entry->status, __entry->timestamp, __entry->switch_time, __entry->switch_timeout, __entry->peer_initiator, CHANDEF_PR_ARG, STA_PR_ARG ) ); TRACE_EVENT(drv_wake_tx_queue, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct txq_info *txq), TP_ARGS(local, sdata, txq), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u8, ac) __field(u8, tid) ), TP_fast_assign( struct ieee80211_sta *sta = txq->txq.sta; LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->ac = txq->txq.ac; __entry->tid = txq->txq.tid; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ac:%d tid:%d", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ac, __entry->tid ) ); TRACE_EVENT(drv_get_ftm_responder_stats, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_ftm_responder_stats *ftm_stats), TP_ARGS(local, sdata, ftm_stats), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG ) ); DEFINE_EVENT(local_sdata_addr_evt, drv_update_vif_offload, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); DECLARE_EVENT_CLASS(sta_flag_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, bool enabled), TP_ARGS(local, sdata, sta, enabled), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(bool, enabled) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->enabled = enabled; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " enabled:%d", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->enabled ) ); DEFINE_EVENT(sta_flag_evt, drv_sta_set_4addr, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, bool enabled), TP_ARGS(local, sdata, sta, enabled) ); DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, bool enabled), TP_ARGS(local, sdata, sta, enabled) ); TRACE_EVENT(drv_add_twt_setup, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, struct ieee80211_twt_setup *twt, struct ieee80211_twt_params *twt_agrt), TP_ARGS(local, sta, twt, twt_agrt), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(u8, dialog_token) __field(u8, control) __field(__le16, req_type) __field(__le64, twt) __field(u8, duration) __field(__le16, mantissa) __field(u8, channel) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->dialog_token = twt->dialog_token; __entry->control = twt->control; __entry->req_type = twt_agrt->req_type; __entry->twt = twt_agrt->twt; __entry->duration = twt_agrt->min_twt_dur; __entry->mantissa = twt_agrt->mantissa; __entry->channel = twt_agrt->channel; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT " token:%d control:0x%02x req_type:0x%04x" " twt:%llu duration:%d mantissa:%d channel:%d", LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token, __entry->control, le16_to_cpu(__entry->req_type), le64_to_cpu(__entry->twt), __entry->duration, le16_to_cpu(__entry->mantissa), __entry->channel ) ); TRACE_EVENT(drv_twt_teardown_request, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, u8 flowid), TP_ARGS(local, sta, flowid), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(u8, flowid) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->flowid = flowid; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT " flowid:%d", LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid ) ); DEFINE_EVENT(sta_event, drv_net_fill_forward_path, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), TP_ARGS(local, sdata, sta) ); TRACE_EVENT(drv_net_setup_tc, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u8 type), TP_ARGS(local, sdata, type), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u8, type) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->type = type; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " type:%d\n", LOCAL_PR_ARG, VIF_PR_ARG, __entry->type ) ); TRACE_EVENT(drv_can_activate_links, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 active_links), TP_ARGS(local, sdata, active_links), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u16, active_links) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->active_links = active_links; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " requested active_links:0x%04x\n", LOCAL_PR_ARG, VIF_PR_ARG, __entry->active_links ) ); TRACE_EVENT(drv_change_vif_links, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 old_links, u16 new_links), TP_ARGS(local, sdata, old_links, new_links), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u16, old_links) __field(u16, new_links) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->old_links = old_links; __entry->new_links = new_links; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT " old_links:0x%04x, new_links:0x%04x\n", LOCAL_PR_ARG, VIF_PR_ARG, __entry->old_links, __entry->new_links ) ); TRACE_EVENT(drv_change_sta_links, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, u16 old_links, u16 new_links), TP_ARGS(local, sdata, sta, old_links, new_links), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u16, old_links) __field(u16, new_links) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; __entry->old_links = old_links; __entry->new_links = new_links; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " old_links:0x%04x, new_links:0x%04x\n", LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->old_links, __entry->new_links ) ); /* * Tracing for API calls that drivers call. */ TRACE_EVENT(api_start_tx_ba_session, TP_PROTO(struct ieee80211_sta *sta, u16 tid), TP_ARGS(sta, tid), TP_STRUCT__entry( STA_ENTRY __field(u16, tid) ), TP_fast_assign( STA_ASSIGN; __entry->tid = tid; ), TP_printk( STA_PR_FMT " tid:%d", STA_PR_ARG, __entry->tid ) ); TRACE_EVENT(api_start_tx_ba_cb, TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid), TP_ARGS(sdata, ra, tid), TP_STRUCT__entry( VIF_ENTRY __array(u8, ra, ETH_ALEN) __field(u16, tid) ), TP_fast_assign( VIF_ASSIGN; memcpy(__entry->ra, ra, ETH_ALEN); __entry->tid = tid; ), TP_printk( VIF_PR_FMT " ra:%pM tid:%d", VIF_PR_ARG, __entry->ra, __entry->tid ) ); TRACE_EVENT(api_stop_tx_ba_session, TP_PROTO(struct ieee80211_sta *sta, u16 tid), TP_ARGS(sta, tid), TP_STRUCT__entry( STA_ENTRY __field(u16, tid) ), TP_fast_assign( STA_ASSIGN; __entry->tid = tid; ), TP_printk( STA_PR_FMT " tid:%d", STA_PR_ARG, __entry->tid ) ); TRACE_EVENT(api_stop_tx_ba_cb, TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid), TP_ARGS(sdata, ra, tid), TP_STRUCT__entry( VIF_ENTRY __array(u8, ra, ETH_ALEN) __field(u16, tid) ), TP_fast_assign( VIF_ASSIGN; memcpy(__entry->ra, ra, ETH_ALEN); __entry->tid = tid; ), TP_printk( VIF_PR_FMT " ra:%pM tid:%d", VIF_PR_ARG, __entry->ra, __entry->tid ) ); DEFINE_EVENT(local_only_evt, api_restart_hw, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); TRACE_EVENT(api_beacon_loss, TP_PROTO(struct ieee80211_sub_if_data *sdata), TP_ARGS(sdata), TP_STRUCT__entry( VIF_ENTRY ), TP_fast_assign( VIF_ASSIGN; ), TP_printk( VIF_PR_FMT, VIF_PR_ARG ) ); TRACE_EVENT(api_connection_loss, TP_PROTO(struct ieee80211_sub_if_data *sdata), TP_ARGS(sdata), TP_STRUCT__entry( VIF_ENTRY ), TP_fast_assign( VIF_ASSIGN; ), TP_printk( VIF_PR_FMT, VIF_PR_ARG ) ); TRACE_EVENT(api_disconnect, TP_PROTO(struct ieee80211_sub_if_data *sdata, bool reconnect), TP_ARGS(sdata, reconnect), TP_STRUCT__entry( VIF_ENTRY __field(int, reconnect) ), TP_fast_assign( VIF_ASSIGN; __entry->reconnect = reconnect; ), TP_printk( VIF_PR_FMT " reconnect:%d", VIF_PR_ARG, __entry->reconnect ) ); TRACE_EVENT(api_cqm_rssi_notify, TP_PROTO(struct ieee80211_sub_if_data *sdata, enum nl80211_cqm_rssi_threshold_event rssi_event, s32 rssi_level), TP_ARGS(sdata, rssi_event, rssi_level), TP_STRUCT__entry( VIF_ENTRY __field(u32, rssi_event) __field(s32, rssi_level) ), TP_fast_assign( VIF_ASSIGN; __entry->rssi_event = rssi_event; __entry->rssi_level = rssi_level; ), TP_printk( VIF_PR_FMT " event:%d rssi:%d", VIF_PR_ARG, __entry->rssi_event, __entry->rssi_level ) ); DEFINE_EVENT(local_sdata_evt, api_cqm_beacon_loss_notify, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), TP_ARGS(local, sdata) ); TRACE_EVENT(api_scan_completed, TP_PROTO(struct ieee80211_local *local, bool aborted), TP_ARGS(local, aborted), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, aborted) ), TP_fast_assign( LOCAL_ASSIGN; __entry->aborted = aborted; ), TP_printk( LOCAL_PR_FMT " aborted:%d", LOCAL_PR_ARG, __entry->aborted ) ); TRACE_EVENT(api_sched_scan_results, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; ), TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG ) ); TRACE_EVENT(api_sched_scan_stopped, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; ), TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG ) ); TRACE_EVENT(api_sta_block_awake, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, bool block), TP_ARGS(local, sta, block), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(bool, block) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->block = block; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT " block:%d", LOCAL_PR_ARG, STA_PR_ARG, __entry->block ) ); TRACE_EVENT(api_chswitch_done, TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success, unsigned int link_id), TP_ARGS(sdata, success, link_id), TP_STRUCT__entry( VIF_ENTRY __field(bool, success) __field(unsigned int, link_id) ), TP_fast_assign( VIF_ASSIGN; __entry->success = success; __entry->link_id = link_id; ), TP_printk( VIF_PR_FMT " success=%d link_id=%d", VIF_PR_ARG, __entry->success, __entry->link_id ) ); DEFINE_EVENT(local_only_evt, api_ready_on_channel, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) ); TRACE_EVENT(api_gtk_rekey_notify, TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *bssid, const u8 *replay_ctr), TP_ARGS(sdata, bssid, replay_ctr), TP_STRUCT__entry( VIF_ENTRY __array(u8, bssid, ETH_ALEN) __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN) ), TP_fast_assign( VIF_ASSIGN; memcpy(__entry->bssid, bssid, ETH_ALEN); memcpy(__entry->replay_ctr, replay_ctr, NL80211_REPLAY_CTR_LEN); ), TP_printk(VIF_PR_FMT, VIF_PR_ARG) ); TRACE_EVENT(api_enable_rssi_reports, TP_PROTO(struct ieee80211_sub_if_data *sdata, int rssi_min_thold, int rssi_max_thold), TP_ARGS(sdata, rssi_min_thold, rssi_max_thold), TP_STRUCT__entry( VIF_ENTRY __field(int, rssi_min_thold) __field(int, rssi_max_thold) ), TP_fast_assign( VIF_ASSIGN; __entry->rssi_min_thold = rssi_min_thold; __entry->rssi_max_thold = rssi_max_thold; ), TP_printk( VIF_PR_FMT " rssi_min_thold =%d, rssi_max_thold = %d", VIF_PR_ARG, __entry->rssi_min_thold, __entry->rssi_max_thold ) ); TRACE_EVENT(api_eosp, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta), TP_ARGS(local, sta), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT, LOCAL_PR_ARG, STA_PR_ARG ) ); TRACE_EVENT(api_send_eosp_nullfunc, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, u8 tid), TP_ARGS(local, sta, tid), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(u8, tid) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->tid = tid; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT " tid:%d", LOCAL_PR_ARG, STA_PR_ARG, __entry->tid ) ); TRACE_EVENT(api_sta_set_buffered, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, u8 tid, bool buffered), TP_ARGS(local, sta, tid, buffered), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(u8, tid) __field(bool, buffered) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->tid = tid; __entry->buffered = buffered; ), TP_printk( LOCAL_PR_FMT STA_PR_FMT " tid:%d buffered:%d", LOCAL_PR_ARG, STA_PR_ARG, __entry->tid, __entry->buffered ) ); TRACE_EVENT(api_radar_detected, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; ), TP_printk( LOCAL_PR_FMT " radar detected", LOCAL_PR_ARG ) ); TRACE_EVENT(api_request_smps, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode), TP_ARGS(local, sdata, link, smps_mode), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, link_id) __field(u32, smps_mode) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->link_id = link->link_id, __entry->smps_mode = smps_mode; ), TP_printk( LOCAL_PR_FMT " " VIF_PR_FMT " link:%d, smps_mode:%d", LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->smps_mode ) ); /* * Tracing for internal functions * (which may also be called in response to driver calls) */ TRACE_EVENT(wake_queue, TP_PROTO(struct ieee80211_local *local, u16 queue, enum queue_stop_reason reason), TP_ARGS(local, queue, reason), TP_STRUCT__entry( LOCAL_ENTRY __field(u16, queue) __field(u32, reason) ), TP_fast_assign( LOCAL_ASSIGN; __entry->queue = queue; __entry->reason = reason; ), TP_printk( LOCAL_PR_FMT " queue:%d, reason:%d", LOCAL_PR_ARG, __entry->queue, __entry->reason ) ); TRACE_EVENT(stop_queue, TP_PROTO(struct ieee80211_local *local, u16 queue, enum queue_stop_reason reason), TP_ARGS(local, queue, reason), TP_STRUCT__entry( LOCAL_ENTRY __field(u16, queue) __field(u32, reason) ), TP_fast_assign( LOCAL_ASSIGN; __entry->queue = queue; __entry->reason = reason; ), TP_printk( LOCAL_PR_FMT " queue:%d, reason:%d", LOCAL_PR_ARG, __entry->queue, __entry->reason ) ); TRACE_EVENT(drv_can_neg_ttlm, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_neg_ttlm *neg_ttlm), TP_ARGS(local, sdata, neg_ttlm), TP_STRUCT__entry(LOCAL_ENTRY VIF_ENTRY __array(u16, downlink, sizeof(u16) * 8) __array(u16, uplink, sizeof(u16) * 8) ), TP_fast_assign(LOCAL_ASSIGN; VIF_ASSIGN; memcpy(__entry->downlink, neg_ttlm->downlink, sizeof(neg_ttlm->downlink)); memcpy(__entry->uplink, neg_ttlm->uplink, sizeof(neg_ttlm->uplink)); ), TP_printk(LOCAL_PR_FMT ", " VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG) ); TRACE_EVENT(drv_neg_ttlm_res, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum ieee80211_neg_ttlm_res res, struct ieee80211_neg_ttlm *neg_ttlm), TP_ARGS(local, sdata, res, neg_ttlm), TP_STRUCT__entry(LOCAL_ENTRY VIF_ENTRY __field(u32, res) __array(u16, downlink, sizeof(u16) * 8) __array(u16, uplink, sizeof(u16) * 8) ), TP_fast_assign(LOCAL_ASSIGN; VIF_ASSIGN; __entry->res = res; memcpy(__entry->downlink, neg_ttlm->downlink, sizeof(neg_ttlm->downlink)); memcpy(__entry->uplink, neg_ttlm->uplink, sizeof(neg_ttlm->uplink)); ), TP_printk(LOCAL_PR_FMT VIF_PR_FMT " response: %d\n ", LOCAL_PR_ARG, VIF_PR_ARG, __entry->res ) ); #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h> |
4 1 1 1 6 2 1 1 1 13 13 6 8 10 14 14 6 11 11 11 11 11 11 11 11 11 11 11 13 14 14 5 11 14 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "device.h" #include "peer.h" #include "socket.h" #include "queueing.h" #include "messages.h" #include <linux/ctype.h> #include <linux/net.h> #include <linux/if_vlan.h> #include <linux/if_ether.h> #include <linux/inetdevice.h> #include <net/udp_tunnel.h> #include <net/ipv6.h> static int send4(struct wg_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache) { struct flowi4 fl = { .saddr = endpoint->src4.s_addr, .daddr = endpoint->addr4.sin_addr.s_addr, .fl4_dport = endpoint->addr4.sin_port, .flowi4_mark = wg->fwmark, .flowi4_proto = IPPROTO_UDP }; struct rtable *rt = NULL; struct sock *sock; int ret = 0; skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; rcu_read_lock_bh(); sock = rcu_dereference_bh(wg->sock4); if (unlikely(!sock)) { ret = -ENONET; goto err; } fl.fl4_sport = inet_sk(sock)->inet_sport; if (cache) rt = dst_cache_get_ip4(cache, &fl.saddr); if (!rt) { security_sk_classify_flow(sock, flowi4_to_flowi_common(&fl)); if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) { endpoint->src4.s_addr = 0; endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); } rt = ip_route_output_flow(sock_net(sock), &fl, sock); if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && rt->dst.dev->ifindex != endpoint->src_if4)))) { endpoint->src4.s_addr = 0; endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); if (!IS_ERR(rt)) ip_rt_put(rt); rt = ip_route_output_flow(sock_net(sock), &fl, sock); } if (IS_ERR(rt)) { ret = PTR_ERR(rt); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); goto err; } if (cache) dst_cache_set_ip4(cache, &rt->dst, fl.saddr); } skb->ignore_df = 1; udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, fl.fl4_dport, false, false); goto out; err: kfree_skb(skb); out: rcu_read_unlock_bh(); return ret; } static int send6(struct wg_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache) { #if IS_ENABLED(CONFIG_IPV6) struct flowi6 fl = { .saddr = endpoint->src6, .daddr = endpoint->addr6.sin6_addr, .fl6_dport = endpoint->addr6.sin6_port, .flowi6_mark = wg->fwmark, .flowi6_oif = endpoint->addr6.sin6_scope_id, .flowi6_proto = IPPROTO_UDP /* TODO: addr->sin6_flowinfo */ }; struct dst_entry *dst = NULL; struct sock *sock; int ret = 0; skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; rcu_read_lock_bh(); sock = rcu_dereference_bh(wg->sock6); if (unlikely(!sock)) { ret = -ENONET; goto err; } fl.fl6_sport = inet_sk(sock)->inet_sport; if (cache) dst = dst_cache_get_ip6(cache, &fl.saddr); if (!dst) { security_sk_classify_flow(sock, flowi6_to_flowi_common(&fl)); if (unlikely(!ipv6_addr_any(&fl.saddr) && !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { endpoint->src6 = fl.saddr = in6addr_any; if (cache) dst_cache_reset(cache); } dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, NULL); if (IS_ERR(dst)) { ret = PTR_ERR(dst); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); goto err; } if (cache) dst_cache_set_ip6(cache, dst, &fl.saddr); } skb->ignore_df = 1; udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, ip6_dst_hoplimit(dst), 0, fl.fl6_sport, fl.fl6_dport, false); goto out; err: kfree_skb(skb); out: rcu_read_unlock_bh(); return ret; #else kfree_skb(skb); return -EAFNOSUPPORT; #endif } int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) { size_t skb_len = skb->len; int ret = -EAFNOSUPPORT; read_lock_bh(&peer->endpoint_lock); if (peer->endpoint.addr.sa_family == AF_INET) ret = send4(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache); else if (peer->endpoint.addr.sa_family == AF_INET6) ret = send6(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache); else dev_kfree_skb(skb); if (likely(!ret)) peer->tx_bytes += skb_len; read_unlock_bh(&peer->endpoint_lock); return ret; } int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, size_t len, u8 ds) { struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); if (unlikely(!skb)) return -ENOMEM; skb_reserve(skb, SKB_HEADER_LEN); skb_set_inner_network_header(skb, 0); skb_put_data(skb, buffer, len); return wg_socket_send_skb_to_peer(peer, skb, ds); } int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, struct sk_buff *in_skb, void *buffer, size_t len) { int ret = 0; struct sk_buff *skb; struct endpoint endpoint; if (unlikely(!in_skb)) return -EINVAL; ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); if (unlikely(ret < 0)) return ret; skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); if (unlikely(!skb)) return -ENOMEM; skb_reserve(skb, SKB_HEADER_LEN); skb_set_inner_network_header(skb, 0); skb_put_data(skb, buffer, len); if (endpoint.addr.sa_family == AF_INET) ret = send4(wg, skb, &endpoint, 0, NULL); else if (endpoint.addr.sa_family == AF_INET6) ret = send6(wg, skb, &endpoint, 0, NULL); /* No other possibilities if the endpoint is valid, which it is, * as we checked above. */ return ret; } int wg_socket_endpoint_from_skb(struct endpoint *endpoint, const struct sk_buff *skb) { memset(endpoint, 0, sizeof(*endpoint)); if (skb->protocol == htons(ETH_P_IP)) { endpoint->addr4.sin_family = AF_INET; endpoint->addr4.sin_port = udp_hdr(skb)->source; endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; endpoint->src4.s_addr = ip_hdr(skb)->daddr; endpoint->src_if4 = skb->skb_iif; } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { endpoint->addr6.sin6_family = AF_INET6; endpoint->addr6.sin6_port = udp_hdr(skb)->source; endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( &ipv6_hdr(skb)->saddr, skb->skb_iif); endpoint->src6 = ipv6_hdr(skb)->daddr; } else { return -EINVAL; } return 0; } static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) { return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && a->addr4.sin_port == b->addr4.sin_port && a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || (a->addr.sa_family == AF_INET6 && b->addr.sa_family == AF_INET6 && a->addr6.sin6_port == b->addr6.sin6_port && ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && ipv6_addr_equal(&a->src6, &b->src6)) || unlikely(!a->addr.sa_family && !b->addr.sa_family); } void wg_socket_set_peer_endpoint(struct wg_peer *peer, const struct endpoint *endpoint) { /* First we check unlocked, in order to optimize, since it's pretty rare * that an endpoint will change. If we happen to be mid-write, and two * CPUs wind up writing the same thing or something slightly different, * it doesn't really matter much either. */ if (endpoint_eq(endpoint, &peer->endpoint)) return; write_lock_bh(&peer->endpoint_lock); if (endpoint->addr.sa_family == AF_INET) { peer->endpoint.addr4 = endpoint->addr4; peer->endpoint.src4 = endpoint->src4; peer->endpoint.src_if4 = endpoint->src_if4; } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) { peer->endpoint.addr6 = endpoint->addr6; peer->endpoint.src6 = endpoint->src6; } else { goto out; } dst_cache_reset(&peer->endpoint_cache); out: write_unlock_bh(&peer->endpoint_lock); } void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, const struct sk_buff *skb) { struct endpoint endpoint; if (!wg_socket_endpoint_from_skb(&endpoint, skb)) wg_socket_set_peer_endpoint(peer, &endpoint); } void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) { write_lock_bh(&peer->endpoint_lock); memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); dst_cache_reset_now(&peer->endpoint_cache); write_unlock_bh(&peer->endpoint_lock); } static int wg_receive(struct sock *sk, struct sk_buff *skb) { struct wg_device *wg; if (unlikely(!sk)) goto err; wg = sk->sk_user_data; if (unlikely(!wg)) goto err; skb_mark_not_on_list(skb); wg_packet_receive(wg, skb); return 0; err: kfree_skb(skb); return 0; } static void sock_free(struct sock *sock) { if (unlikely(!sock)) return; sk_clear_memalloc(sock); udp_tunnel_sock_release(sock->sk_socket); } static void set_sock_opts(struct socket *sock) { sock->sk->sk_allocation = GFP_ATOMIC; sock->sk->sk_sndbuf = INT_MAX; sk_set_memalloc(sock->sk); } int wg_socket_init(struct wg_device *wg, u16 port) { struct net *net; int ret; struct udp_tunnel_sock_cfg cfg = { .sk_user_data = wg, .encap_type = 1, .encap_rcv = wg_receive }; struct socket *new4 = NULL, *new6 = NULL; struct udp_port_cfg port4 = { .family = AF_INET, .local_ip.s_addr = htonl(INADDR_ANY), .local_udp_port = htons(port), .use_udp_checksums = true }; #if IS_ENABLED(CONFIG_IPV6) int retries = 0; struct udp_port_cfg port6 = { .family = AF_INET6, .local_ip6 = IN6ADDR_ANY_INIT, .use_udp6_tx_checksums = true, .use_udp6_rx_checksums = true, .ipv6_v6only = true }; #endif rcu_read_lock(); net = rcu_dereference(wg->creating_net); net = net ? maybe_get_net(net) : NULL; rcu_read_unlock(); if (unlikely(!net)) return -ENONET; #if IS_ENABLED(CONFIG_IPV6) retry: #endif ret = udp_sock_create(net, &port4, &new4); if (ret < 0) { pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); goto out; } set_sock_opts(new4); setup_udp_tunnel_sock(net, new4, &cfg); #if IS_ENABLED(CONFIG_IPV6) if (ipv6_mod_enabled()) { port6.local_udp_port = inet_sk(new4->sk)->inet_sport; ret = udp_sock_create(net, &port6, &new6); if (ret < 0) { udp_tunnel_sock_release(new4); if (ret == -EADDRINUSE && !port && retries++ < 100) goto retry; pr_err("%s: Could not create IPv6 socket\n", wg->dev->name); goto out; } set_sock_opts(new6); setup_udp_tunnel_sock(net, new6, &cfg); } #endif wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); ret = 0; out: put_net(net); return ret; } void wg_socket_reinit(struct wg_device *wg, struct sock *new4, struct sock *new6) { struct sock *old4, *old6; mutex_lock(&wg->socket_update_lock); old4 = rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock)); old6 = rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock)); rcu_assign_pointer(wg->sock4, new4); rcu_assign_pointer(wg->sock6, new6); if (new4) wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); mutex_unlock(&wg->socket_update_lock); synchronize_net(); sock_free(old4); sock_free(old6); } |
8 8 8 8 5 5 5 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | // SPDX-License-Identifier: GPL-2.0-only // Copyright (c) 2020 Facebook Inc. #include <linux/debugfs.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <net/udp_tunnel.h> #include "netdevsim.h" static int nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (ns->udp_ports.sleep) msleep(ns->udp_ports.sleep); if (!ret) { if (ns->udp_ports.ports[table][entry]) { WARN(1, "entry already in use\n"); ret = -EBUSY; } else { ns->udp_ports.ports[table][entry] = be16_to_cpu(ti->port) << 16 | ti->type; } } netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (ns->udp_ports.sleep) msleep(ns->udp_ports.sleep); if (!ret) { u32 val = be16_to_cpu(ti->port) << 16 | ti->type; if (val == ns->udp_ports.ports[table][entry]) { ns->udp_ports.ports[table][entry] = 0; } else { WARN(1, "entry not installed %x vs %x\n", val, ns->udp_ports.ports[table][entry]); ret = -ENOENT; } } netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_sync_table(struct net_device *dev, unsigned int table) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_info ti; unsigned int i; int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; for (i = 0; i < NSIM_UDP_TUNNEL_N_PORTS; i++) { udp_tunnel_nic_get_port(dev, table, i, &ti); ns->udp_ports.ports[table][i] = be16_to_cpu(ti.port) << 16 | ti.type; } return ret; } static const struct udp_tunnel_nic_info nsim_udp_tunnel_info = { .set_port = nsim_udp_tunnel_set_port, .unset_port = nsim_udp_tunnel_unset_port, .sync_table = nsim_udp_tunnel_sync_table, .tables = { { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE | UDP_TUNNEL_TYPE_VXLAN_GPE, }, }, }; static ssize_t nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); rtnl_lock(); udp_tunnel_nic_reset_ntf(dev); rtnl_unlock(); return count; } static const struct file_operations nsim_udp_tunnels_info_reset_fops = { .open = simple_open, .write = nsim_udp_tunnels_info_reset_write, .llseek = generic_file_llseek, .owner = THIS_MODULE, }; int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_nic_info *info; if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) { dev_err(&nsim_dev->nsim_bus_dev->dev, "shared can't be used in conjunction with open_only\n"); return -EINVAL; } if (!nsim_dev->udp_ports.shared) ns->udp_ports.ports = ns->udp_ports.__ports; else ns->udp_ports.ports = nsim_dev->udp_ports.__ports; debugfs_create_u32("udp_ports_inject_error", 0600, ns->nsim_dev_port->ddir, &ns->udp_ports.inject_error); ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("udp_ports_table0", 0400, ns->nsim_dev_port->ddir, &ns->udp_ports.dfs_ports[0]); ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("udp_ports_table1", 0400, ns->nsim_dev_port->ddir, &ns->udp_ports.dfs_ports[1]); debugfs_create_file("udp_ports_reset", 0200, ns->nsim_dev_port->ddir, dev, &nsim_udp_tunnels_info_reset_fops); /* Note: it's not normal to allocate the info struct like this! * Drivers are expected to use a static const one, here we're testing. */ info = kmemdup(&nsim_udp_tunnel_info, sizeof(nsim_udp_tunnel_info), GFP_KERNEL); if (!info) return -ENOMEM; ns->udp_ports.sleep = nsim_dev->udp_ports.sleep; if (nsim_dev->udp_ports.sync_all) { info->set_port = NULL; info->unset_port = NULL; } else { info->sync_table = NULL; } if (ns->udp_ports.sleep) info->flags |= UDP_TUNNEL_NIC_INFO_MAY_SLEEP; if (nsim_dev->udp_ports.open_only) info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; if (nsim_dev->udp_ports.ipv4_only) info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; if (nsim_dev->udp_ports.shared) info->shared = &nsim_dev->udp_ports.utn_shared; if (nsim_dev->udp_ports.static_iana_vxlan) info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; dev->udp_tunnel_nic_info = info; return 0; } void nsim_udp_tunnels_info_destroy(struct net_device *dev) { kfree(dev->udp_tunnel_nic_info); dev->udp_tunnel_nic_info = NULL; } void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) { debugfs_create_bool("udp_ports_sync_all", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sync_all); debugfs_create_bool("udp_ports_open_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.open_only); debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.ipv4_only); debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.shared); debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.static_iana_vxlan); debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sleep); } |
1 1 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | // SPDX-License-Identifier: GPL-2.0-only /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("ip6tables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_FILTER, }; static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ static bool forward = true; module_param(forward, bool, 0000); static int ip6table_filter_table_init(struct net *net) { struct ip6t_replace *repl; int err; repl = ip6t_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ip6t_standard *)repl->entries)[1].target.verdict = forward ? -NF_ACCEPT - 1 : NF_DROP - 1; err = ip6t_register_table(net, &packet_filter, repl, filter_ops); kfree(repl); return err; } static int __net_init ip6table_filter_net_init(struct net *net) { if (!forward) return ip6table_filter_table_init(net); return 0; } static void __net_exit ip6table_filter_net_pre_exit(struct net *net) { ip6t_unregister_table_pre_exit(net, "filter"); } static void __net_exit ip6table_filter_net_exit(struct net *net) { ip6t_unregister_table_exit(net, "filter"); } static struct pernet_operations ip6table_filter_net_ops = { .init = ip6table_filter_net_init, .pre_exit = ip6table_filter_net_pre_exit, .exit = ip6table_filter_net_exit, }; static int __init ip6table_filter_init(void) { int ret = xt_register_template(&packet_filter, ip6table_filter_table_init); if (ret < 0) return ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); } ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) { xt_unregister_template(&packet_filter); kfree(filter_ops); return ret; } return ret; } static void __exit ip6table_filter_fini(void) { unregister_pernet_subsys(&ip6table_filter_net_ops); xt_unregister_template(&packet_filter); kfree(filter_ops); } module_init(ip6table_filter_init); module_exit(ip6table_filter_fini); |
7 1 1 2 3 3 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 | // SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_choke.c CHOKE scheduler * * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com> * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/inet_ecn.h> #include <net/red.h> #include <net/flow_dissector.h> /* CHOKe stateless AQM for fair bandwidth allocation ================================================= CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for unresponsive flows) is a variant of RED that penalizes misbehaving flows but maintains no flow state. The difference from RED is an additional step during the enqueuing process. If average queue size is over the low threshold (qmin), a packet is chosen at random from the queue. If both the new and chosen packet are from the same flow, both are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it needs to access packets in queue randomly. It has a minimal class interface to allow overriding the builtin flow classifier with filters. Source: R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless Active Queue Management Scheme for Approximating Fair Bandwidth Allocation", IEEE INFOCOM, 2000. A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial Characteristics", IEEE/ACM Transactions on Networking, 2004 */ /* Upper bound on size of sk_buff table (packets) */ #define CHOKE_MAX_QUEUE (128*1024 - 1) struct choke_sched_data { /* Parameters */ u32 limit; unsigned char flags; struct red_parms parms; /* Variables */ struct red_vars vars; struct { u32 prob_drop; /* Early probability drops */ u32 prob_mark; /* Early probability marks */ u32 forced_drop; /* Forced drops, qavg > max_thresh */ u32 forced_mark; /* Forced marks, qavg > max_thresh */ u32 pdrop; /* Drops due to queue limits */ u32 matched; /* Drops to flow match */ } stats; unsigned int head; unsigned int tail; unsigned int tab_mask; /* size - 1 */ struct sk_buff **tab; }; /* number of elements in queue including holes */ static unsigned int choke_len(const struct choke_sched_data *q) { return (q->tail - q->head) & q->tab_mask; } /* Is ECN parameter configured */ static int use_ecn(const struct choke_sched_data *q) { return q->flags & TC_RED_ECN; } /* Should packets over max just be dropped (versus marked) */ static int use_harddrop(const struct choke_sched_data *q) { return q->flags & TC_RED_HARDDROP; } /* Move head pointer forward to skip over holes */ static void choke_zap_head_holes(struct choke_sched_data *q) { do { q->head = (q->head + 1) & q->tab_mask; if (q->head == q->tail) break; } while (q->tab[q->head] == NULL); } /* Move tail pointer backwards to reuse holes */ static void choke_zap_tail_holes(struct choke_sched_data *q) { do { q->tail = (q->tail - 1) & q->tab_mask; if (q->head == q->tail) break; } while (q->tab[q->tail] == NULL); } /* Drop packet from queue array by creating a "hole" */ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx, struct sk_buff **to_free) { struct choke_sched_data *q = qdisc_priv(sch); struct sk_buff *skb = q->tab[idx]; q->tab[idx] = NULL; if (idx == q->head) choke_zap_head_holes(q); if (idx == q->tail) choke_zap_tail_holes(q); qdisc_qstats_backlog_dec(sch, skb); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_drop(skb, sch, to_free); --sch->q.qlen; } struct choke_skb_cb { u8 keys_valid; struct flow_keys_digest keys; }; static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) { qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb)); return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; } /* * Compare flow of two packets * Returns true only if source and destination address and port match. * false for special cases */ static bool choke_match_flow(struct sk_buff *skb1, struct sk_buff *skb2) { struct flow_keys temp; if (skb1->protocol != skb2->protocol) return false; if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; skb_flow_dissect_flow_keys(skb1, &temp, 0); make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; skb_flow_dissect_flow_keys(skb2, &temp, 0); make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); } return !memcmp(&choke_skb_cb(skb1)->keys, &choke_skb_cb(skb2)->keys, sizeof(choke_skb_cb(skb1)->keys)); } /* * Select a packet at random from queue * HACK: since queue can have holes from previous deletion; retry several * times to find a random skb but then just give up and return the head * Will return NULL if queue is empty (q->head == q->tail) */ static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, unsigned int *pidx) { struct sk_buff *skb; int retrys = 3; do { *pidx = (q->head + get_random_u32_below(choke_len(q))) & q->tab_mask; skb = q->tab[*pidx]; if (skb) return skb; } while (--retrys > 0); return q->tab[*pidx = q->head]; } /* * Compare new packet with random packet in queue * returns true if matched and sets *pidx */ static bool choke_match_random(const struct choke_sched_data *q, struct sk_buff *nskb, unsigned int *pidx) { struct sk_buff *oskb; if (q->head == q->tail) return false; oskb = choke_peek_random(q, pidx); return choke_match_flow(oskb, nskb); } static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct choke_sched_data *q = qdisc_priv(sch); const struct red_parms *p = &q->parms; choke_skb_cb(skb)->keys_valid = 0; /* Compute average queue usage (see RED) */ q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen); if (red_is_idling(&q->vars)) red_end_of_idle_period(&q->vars); /* Is queue small? */ if (q->vars.qavg <= p->qth_min) q->vars.qcount = -1; else { unsigned int idx; /* Draw a packet at random from queue and compare flow */ if (choke_match_random(q, skb, &idx)) { q->stats.matched++; choke_drop_by_idx(sch, idx, to_free); goto congestion_drop; } /* Queue is large, always mark/drop */ if (q->vars.qavg > p->qth_max) { q->vars.qcount = -1; qdisc_qstats_overlimit(sch); if (use_harddrop(q) || !use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.forced_drop++; goto congestion_drop; } q->stats.forced_mark++; } else if (++q->vars.qcount) { if (red_mark_probability(p, &q->vars, q->vars.qavg)) { q->vars.qcount = 0; q->vars.qR = red_random(p); qdisc_qstats_overlimit(sch); if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.prob_drop++; goto congestion_drop; } q->stats.prob_mark++; } } else q->vars.qR = red_random(p); } /* Admit new packet */ if (sch->q.qlen < q->limit) { q->tab[q->tail] = skb; q->tail = (q->tail + 1) & q->tab_mask; ++sch->q.qlen; qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; } q->stats.pdrop++; return qdisc_drop(skb, sch, to_free); congestion_drop: qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; } static struct sk_buff *choke_dequeue(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; if (q->head == q->tail) { if (!red_is_idling(&q->vars)) red_start_of_idle_period(&q->vars); return NULL; } skb = q->tab[q->head]; q->tab[q->head] = NULL; choke_zap_head_holes(q); --sch->q.qlen; qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); return skb; } static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); while (q->head != q->tail) { struct sk_buff *skb = q->tab[q->head]; q->head = (q->head + 1) & q->tab_mask; if (!skb) continue; rtnl_qdisc_drop(skb, sch); } if (q->tab) memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; red_restart(&q->vars); } static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, [TCA_CHOKE_MAX_P] = { .type = NLA_U32 }, }; static void choke_free(void *addr) { kvfree(addr); } static int choke_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct choke_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CHOKE_MAX + 1]; const struct tc_red_qopt *ctl; int err; struct sk_buff **old = NULL; unsigned int mask; u32 max_P; u8 *stab; if (opt == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL); if (err < 0) return err; if (tb[TCA_CHOKE_PARMS] == NULL || tb[TCA_CHOKE_STAB] == NULL) return -EINVAL; max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0; ctl = nla_data(tb[TCA_CHOKE_PARMS]); stab = nla_data(tb[TCA_CHOKE_STAB]); if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) return -EINVAL; if (ctl->limit > CHOKE_MAX_QUEUE) return -EINVAL; mask = roundup_pow_of_two(ctl->limit + 1) - 1; if (mask != q->tab_mask) { struct sk_buff **ntab; ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); if (!ntab) return -ENOMEM; sch_tree_lock(sch); old = q->tab; if (old) { unsigned int oqlen = sch->q.qlen, tail = 0; unsigned dropped = 0; while (q->head != q->tail) { struct sk_buff *skb = q->tab[q->head]; q->head = (q->head + 1) & q->tab_mask; if (!skb) continue; if (tail < mask) { ntab[tail++] = skb; continue; } dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); --sch->q.qlen; rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped); q->head = 0; q->tail = tail; } q->tab_mask = mask; q->tab = ntab; } else sch_tree_lock(sch); WRITE_ONCE(q->flags, ctl->flags); WRITE_ONCE(q->limit, ctl->limit); red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, stab, max_P); red_set_vars(&q->vars); if (q->head == q->tail) red_end_of_idle_period(&q->vars); sch_tree_unlock(sch); choke_free(old); return 0; } static int choke_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { return choke_change(sch, opt, extack); } static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) { struct choke_sched_data *q = qdisc_priv(sch); u8 Wlog = READ_ONCE(q->parms.Wlog); struct nlattr *opts = NULL; struct tc_red_qopt opt = { .limit = READ_ONCE(q->limit), .flags = READ_ONCE(q->flags), .qth_min = READ_ONCE(q->parms.qth_min) >> Wlog, .qth_max = READ_ONCE(q->parms.qth_max) >> Wlog, .Wlog = Wlog, .Plog = READ_ONCE(q->parms.Plog), .Scell_log = READ_ONCE(q->parms.Scell_log), }; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) || nla_put_u32(skb, TCA_CHOKE_MAX_P, READ_ONCE(q->parms.max_P))) goto nla_put_failure; return nla_nest_end(skb, opts); nla_put_failure: nla_nest_cancel(skb, opts); return -EMSGSIZE; } static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct choke_sched_data *q = qdisc_priv(sch); struct tc_choke_xstats st = { .early = q->stats.prob_drop + q->stats.forced_drop, .marked = q->stats.prob_mark + q->stats.forced_mark, .pdrop = q->stats.pdrop, .matched = q->stats.matched, }; return gnet_stats_copy_app(d, &st, sizeof(st)); } static void choke_destroy(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); choke_free(q->tab); } static struct sk_buff *choke_peek_head(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); return (q->head != q->tail) ? q->tab[q->head] : NULL; } static struct Qdisc_ops choke_qdisc_ops __read_mostly = { .id = "choke", .priv_size = sizeof(struct choke_sched_data), .enqueue = choke_enqueue, .dequeue = choke_dequeue, .peek = choke_peek_head, .init = choke_init, .destroy = choke_destroy, .reset = choke_reset, .change = choke_change, .dump = choke_dump, .dump_stats = choke_dump_stats, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("choke"); static int __init choke_module_init(void) { return register_qdisc(&choke_qdisc_ops); } static void __exit choke_module_exit(void) { unregister_qdisc(&choke_qdisc_ops); } module_init(choke_module_init) module_exit(choke_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Choose and keep responsive flows scheduler"); |
2 1 1 10 1 1 1 1 1 1 1 1 1 10 10 9 3 3 3 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 | // SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_qfq.c Quick Fair Queueing Plus Scheduler. * * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente. * Copyright (c) 2012 Paolo Valente. */ #include <linux/module.h> #include <linux/init.h> #include <linux/bitops.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/pkt_sched.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> /* Quick Fair Queueing Plus ======================== Sources: [1] Paolo Valente, "Reducing the Execution Time of Fair-Queueing Schedulers." http://algo.ing.unimo.it/people/paolo/agg-sched/agg-sched.pdf Sources for QFQ: [2] Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution Guarantees." See also: http://retis.sssup.it/~fabio/linux/qfq/ */ /* QFQ+ divides classes into aggregates of at most MAX_AGG_CLASSES classes. Each aggregate is timestamped with a virtual start time S and a virtual finish time F, and scheduled according to its timestamps. S and F are computed as a function of a system virtual time function V. The classes within each aggregate are instead scheduled with DRR. To speed up operations, QFQ+ divides also aggregates into a limited number of groups. Which group a class belongs to depends on the ratio between the maximum packet length for the class and the weight of the class. Groups have their own S and F. In the end, QFQ+ schedules groups, then aggregates within groups, then classes within aggregates. See [1] and [2] for a full description. Virtual time computations. S, F and V are all computed in fixed point arithmetic with FRAC_BITS decimal bits. QFQ_MAX_INDEX is the maximum index allowed for a group. We need one bit per index. QFQ_MAX_WSHIFT is the maximum power of two supported as a weight. The layout of the bits is as below: [ MTU_SHIFT ][ FRAC_BITS ] [ MAX_INDEX ][ MIN_SLOT_SHIFT ] ^.__grp->index = 0 *.__grp->slot_shift where MIN_SLOT_SHIFT is derived by difference from the others. The max group index corresponds to Lmax/w_min, where Lmax=1<<MTU_SHIFT, w_min = 1 . From this, and knowing how many groups (MAX_INDEX) we want, we can derive the shift corresponding to each group. Because we often need to compute F = S + len/w_i and V = V + len/wsum instead of storing w_i store the value inv_w = (1<<FRAC_BITS)/w_i so we can do F = S + len * inv_w * wsum. We use W_TOT in the formulas so we can easily move between static and adaptive weight sum. The per-scheduler-instance data contain all the data structures for the scheduler: bitmaps and bucket lists. */ /* * Maximum number of consecutive slots occupied by backlogged classes * inside a group. */ #define QFQ_MAX_SLOTS 32 /* * Shifts used for aggregate<->group mapping. We allow class weights that are * in the range [1, 2^MAX_WSHIFT], and we try to map each aggregate i to the * group with the smallest index that can support the L_i / r_i configured * for the classes in the aggregate. * * grp->index is the index of the group; and grp->slot_shift * is the shift for the corresponding (scaled) sigma_i. */ #define QFQ_MAX_INDEX 24 #define QFQ_MAX_WSHIFT 10 #define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT) /* see qfq_slot_insert */ #define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT) #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ #define QFQ_MAX_LMAX (1UL << QFQ_MTU_SHIFT) #define QFQ_MAX_AGG_CLASSES 8 /* max num classes per aggregate allowed */ /* * Possible group states. These values are used as indexes for the bitmaps * array of struct qfq_queue. */ enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE }; struct qfq_group; struct qfq_aggregate; struct qfq_class { struct Qdisc_class_common common; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ int deficit; /* DRR deficit counter. */ }; struct qfq_aggregate { struct hlist_node next; /* Link for the slot list. */ u64 S, F; /* flow timestamps (exact) */ /* group we belong to. In principle we would need the index, * which is log_2(lmax/weight), but we never reference it * directly, only the group. */ struct qfq_group *grp; /* these are copied from the flowset. */ u32 class_weight; /* Weight of each class in this aggregate. */ /* Max pkt size for the classes in this aggregate, DRR quantum. */ int lmax; u32 inv_w; /* ONE_FP/(sum of weights of classes in aggr.). */ u32 budgetmax; /* Max budget for this aggregate. */ u32 initial_budget, budget; /* Initial and current budget. */ int num_classes; /* Number of classes in this aggr. */ struct list_head active; /* DRR queue of active classes. */ struct hlist_node nonfull_next; /* See nonfull_aggs in qfq_sched. */ }; struct qfq_group { u64 S, F; /* group timestamps (approx). */ unsigned int slot_shift; /* Slot shift. */ unsigned int index; /* Group index. */ unsigned int front; /* Index of the front slot. */ unsigned long full_slots; /* non-empty slots */ /* Array of RR lists of active aggregates. */ struct hlist_head slots[QFQ_MAX_SLOTS]; }; struct qfq_sched { struct tcf_proto __rcu *filter_list; struct tcf_block *block; struct Qdisc_class_hash clhash; u64 oldV, V; /* Precise virtual times. */ struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ u32 wsum; /* weight sum */ u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ u32 min_slot_shift; /* Index of the group-0 bit in the bitmaps. */ u32 max_agg_classes; /* Max number of classes per aggr. */ struct hlist_head nonfull_aggs; /* Aggs with room for more classes. */ }; /* * Possible reasons why the timestamps of an aggregate are updated * enqueue: the aggregate switches from idle to active and must scheduled * for service * requeue: the aggregate finishes its budget, so it stops being served and * must be rescheduled for service */ enum update_reason {enqueue, requeue}; static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) { struct qfq_sched *q = qdisc_priv(sch); struct Qdisc_class_common *clc; clc = qdisc_class_find(&q->clhash, classid); if (clc == NULL) return NULL; return container_of(clc, struct qfq_class, common); } static const struct netlink_range_validation lmax_range = { .min = QFQ_MIN_LMAX, .max = QFQ_MAX_LMAX, }; static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { [TCA_QFQ_WEIGHT] = NLA_POLICY_RANGE(NLA_U32, 1, QFQ_MAX_WEIGHT), [TCA_QFQ_LMAX] = NLA_POLICY_FULL_RANGE(NLA_U32, &lmax_range), }; /* * Calculate a flow index, given its weight and maximum packet length. * index = log_2(maxlen/weight) but we need to apply the scaling. * This is used only once at flow creation. */ static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift) { u64 slot_size = (u64)maxlen * inv_w; unsigned long size_map; int index = 0; size_map = slot_size >> min_slot_shift; if (!size_map) goto out; index = __fls(size_map) + 1; /* basically a log_2 */ index -= !(slot_size - (1ULL << (index + min_slot_shift - 1))); if (index < 0) index = 0; out: pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n", (unsigned long) ONE_FP/inv_w, maxlen, index); return index; } static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *); static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *, enum update_reason); static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg, u32 lmax, u32 weight) { INIT_LIST_HEAD(&agg->active); hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs); agg->lmax = lmax; agg->class_weight = weight; } static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q, u32 lmax, u32 weight) { struct qfq_aggregate *agg; hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next) if (agg->lmax == lmax && agg->class_weight == weight) return agg; return NULL; } /* Update aggregate as a function of the new number of classes. */ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, int new_num_classes) { u32 new_agg_weight; if (new_num_classes == q->max_agg_classes) hlist_del_init(&agg->nonfull_next); if (agg->num_classes > new_num_classes && new_num_classes == q->max_agg_classes - 1) /* agg no more full */ hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs); /* The next assignment may let * agg->initial_budget > agg->budgetmax * hold, we will take it into account in charge_actual_service(). */ agg->budgetmax = new_num_classes * agg->lmax; new_agg_weight = agg->class_weight * new_num_classes; agg->inv_w = ONE_FP/new_agg_weight; if (agg->grp == NULL) { int i = qfq_calc_index(agg->inv_w, agg->budgetmax, q->min_slot_shift); agg->grp = &q->groups[i]; } q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } /* Add class to aggregate. */ static void qfq_add_to_agg(struct qfq_sched *q, struct qfq_aggregate *agg, struct qfq_class *cl) { cl->agg = agg; qfq_update_agg(q, agg, agg->num_classes+1); if (cl->qdisc->q.qlen > 0) { /* adding an active class */ list_add_tail(&cl->alist, &agg->active); if (list_first_entry(&agg->active, struct qfq_class, alist) == cl && q->in_serv_agg != agg) /* agg was inactive */ qfq_activate_agg(q, agg, enqueue); /* schedule agg */ } } static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *); static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { hlist_del_init(&agg->nonfull_next); q->wsum -= agg->class_weight; if (q->wsum != 0) q->iwsum = ONE_FP / q->wsum; if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); } /* Deschedule class from within its parent aggregate. */ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) { struct qfq_aggregate *agg = cl->agg; list_del(&cl->alist); /* remove from RR queue of the aggregate */ if (list_empty(&agg->active)) /* agg is now inactive */ qfq_deactivate_agg(q, agg); } /* Remove class from its parent aggregate. */ static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl) { struct qfq_aggregate *agg = cl->agg; cl->agg = NULL; if (agg->num_classes == 1) { /* agg being emptied, destroy it */ qfq_destroy_agg(q, agg); return; } qfq_update_agg(q, agg, agg->num_classes-1); } /* Deschedule class and remove it from its parent aggregate. */ static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl) { if (cl->qdisc->q.qlen > 0) /* class is active */ qfq_deactivate_class(q, cl); qfq_rm_from_agg(q, cl); } /* Move class to a new aggregate, matching the new class weight and/or lmax */ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, u32 lmax) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_aggregate *new_agg; /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */ if (lmax > QFQ_MAX_LMAX) return -EINVAL; new_agg = qfq_find_agg(q, lmax, weight); if (new_agg == NULL) { /* create new aggregate */ new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC); if (new_agg == NULL) return -ENOBUFS; qfq_init_agg(q, new_agg, lmax, weight); } qfq_deact_rm_from_agg(q, cl); qfq_add_to_agg(q, new_agg, cl); return 0; } static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg, struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)*arg; bool existing = false; struct nlattr *tb[TCA_QFQ_MAX + 1]; struct qfq_aggregate *new_agg = NULL; u32 weight, lmax, inv_w; int err; int delta_w; if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) { NL_SET_ERR_MSG_MOD(extack, "missing options"); return -EINVAL; } err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy, extack); if (err < 0) return err; if (tb[TCA_QFQ_WEIGHT]) weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); else weight = 1; if (tb[TCA_QFQ_LMAX]) { lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); } else { /* MTU size is user controlled */ lmax = psched_mtu(qdisc_dev(sch)); if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) { NL_SET_ERR_MSG_MOD(extack, "MTU size out of bounds for qfq"); return -EINVAL; } } inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; if (cl != NULL && lmax == cl->agg->lmax && weight == cl->agg->class_weight) return 0; /* nothing to change */ delta_w = weight - (cl ? cl->agg->class_weight : 0); if (q->wsum + delta_w > QFQ_MAX_WSUM) { NL_SET_ERR_MSG_FMT_MOD(extack, "total weight out of range (%d + %u)\n", delta_w, q->wsum); return -EINVAL; } if (cl != NULL) { /* modify existing class */ if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, true, tca[TCA_RATE]); if (err) return err; } existing = true; goto set_change_agg; } /* create and init new class */ cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL); if (cl == NULL) return -ENOBUFS; gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, NULL); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, true, tca[TCA_RATE]); if (err) goto destroy_class; } if (cl->qdisc != &noop_qdisc) qdisc_hash_add(cl->qdisc, true); set_change_agg: sch_tree_lock(sch); new_agg = qfq_find_agg(q, lmax, weight); if (new_agg == NULL) { /* create new aggregate */ sch_tree_unlock(sch); new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL); if (new_agg == NULL) { err = -ENOBUFS; gen_kill_estimator(&cl->rate_est); goto destroy_class; } sch_tree_lock(sch); qfq_init_agg(q, new_agg, lmax, weight); } if (existing) qfq_deact_rm_from_agg(q, cl); else qdisc_class_hash_insert(&q->clhash, &cl->common); qfq_add_to_agg(q, new_agg, cl); sch_tree_unlock(sch); qdisc_class_hash_grow(sch, &q->clhash); *arg = (unsigned long)cl; return 0; destroy_class: qdisc_put(cl->qdisc); kfree(cl); return err; } static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) { struct qfq_sched *q = qdisc_priv(sch); qfq_rm_from_agg(q, cl); gen_kill_estimator(&cl->rate_est); qdisc_put(cl->qdisc); kfree(cl); } static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; if (qdisc_class_in_use(&cl->common)) { NL_SET_ERR_MSG_MOD(extack, "QFQ class in use"); return -EBUSY; } sch_tree_lock(sch); qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); qfq_destroy_class(sch, cl); return 0; } static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid) { return (unsigned long)qfq_find_class(sch, classid); } static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl, struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); if (cl) return NULL; return q->block; } static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid) { struct qfq_class *cl = qfq_find_class(sch, classid); if (cl) qdisc_class_get(&cl->common); return (unsigned long)cl; } static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) { struct qfq_class *cl = (struct qfq_class *)arg; qdisc_class_put(&cl->common); } static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct qfq_class *cl = (struct qfq_class *)arg; if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, cl->common.classid, NULL); if (new == NULL) new = &noop_qdisc; } *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg) { struct qfq_class *cl = (struct qfq_class *)arg; return cl->qdisc; } static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct qfq_class *cl = (struct qfq_class *)arg; struct nlattr *nest; tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) || nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct qfq_class *cl = (struct qfq_class *)arg; struct tc_qfq_stats xstats; memset(&xstats, 0, sizeof(xstats)); xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); } static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; } } } static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct tcf_result res; struct tcf_proto *fl; int result; if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { pr_debug("qfq_classify: found %d\n", skb->priority); cl = qfq_find_class(sch, skb->priority); if (cl != NULL) return cl; } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; case TC_ACT_SHOT: return NULL; } #endif cl = (struct qfq_class *)res.class; if (cl == NULL) cl = qfq_find_class(sch, res.classid); return cl; } return NULL; } /* Generic comparison function, handling wraparound. */ static inline int qfq_gt(u64 a, u64 b) { return (s64)(a - b) > 0; } /* Round a precise timestamp to its slotted value. */ static inline u64 qfq_round_down(u64 ts, unsigned int shift) { return ts & ~((1ULL << shift) - 1); } /* return the pointer to the group with lowest index in the bitmap */ static inline struct qfq_group *qfq_ffs(struct qfq_sched *q, unsigned long bitmap) { int index = __ffs(bitmap); return &q->groups[index]; } /* Calculate a mask to mimic what would be ffs_from(). */ static inline unsigned long mask_from(unsigned long bitmap, int from) { return bitmap & ~((1UL << from) - 1); } /* * The state computation relies on ER=0, IR=1, EB=2, IB=3 * First compute eligibility comparing grp->S, q->V, * then check if someone is blocking us and possibly add EB */ static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp) { /* if S > V we are not eligible */ unsigned int state = qfq_gt(grp->S, q->V); unsigned long mask = mask_from(q->bitmaps[ER], grp->index); struct qfq_group *next; if (mask) { next = qfq_ffs(q, mask); if (qfq_gt(grp->F, next->F)) state |= EB; } return state; } /* * In principle * q->bitmaps[dst] |= q->bitmaps[src] & mask; * q->bitmaps[src] &= ~mask; * but we should make sure that src != dst */ static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst) { q->bitmaps[dst] |= q->bitmaps[src] & mask; q->bitmaps[src] &= ~mask; } static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F) { unsigned long mask = mask_from(q->bitmaps[ER], index + 1); struct qfq_group *next; if (mask) { next = qfq_ffs(q, mask); if (!qfq_gt(next->F, old_F)) return; } mask = (1UL << index) - 1; qfq_move_groups(q, mask, EB, ER); qfq_move_groups(q, mask, IB, IR); } /* * perhaps * old_V ^= q->V; old_V >>= q->min_slot_shift; if (old_V) { ... } * */ static void qfq_make_eligible(struct qfq_sched *q) { unsigned long vslot = q->V >> q->min_slot_shift; unsigned long old_vslot = q->oldV >> q->min_slot_shift; if (vslot != old_vslot) { unsigned long mask; int last_flip_pos = fls(vslot ^ old_vslot); if (last_flip_pos > 31) /* higher than the number of groups */ mask = ~0UL; /* make all groups eligible */ else mask = (1UL << last_flip_pos) - 1; qfq_move_groups(q, mask, IR, ER); qfq_move_groups(q, mask, IB, EB); } } /* * The index of the slot in which the input aggregate agg is to be * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' * and not a '-1' because the start time of the group may be moved * backward by one slot after the aggregate has been inserted, and * this would cause non-empty slots to be right-shifted by one * position. * * QFQ+ fully satisfies this bound to the slot index if the parameters * of the classes are not changed dynamically, and if QFQ+ never * happens to postpone the service of agg unjustly, i.e., it never * happens that the aggregate becomes backlogged and eligible, or just * eligible, while an aggregate with a higher approximated finish time * is being served. In particular, in this case QFQ+ guarantees that * the timestamps of agg are low enough that the slot index is never * higher than 2. Unfortunately, QFQ+ cannot provide the same * guarantee if it happens to unjustly postpone the service of agg, or * if the parameters of some class are changed. * * As for the first event, i.e., an out-of-order service, the * upper bound to the slot index guaranteed by QFQ+ grows to * 2 + * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1. * * The following function deals with this problem by backward-shifting * the timestamps of agg, if needed, so as to guarantee that the slot * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may * cause the service of other aggregates to be postponed, yet the * worst-case guarantees of these aggregates are not violated. In * fact, in case of no out-of-order service, the timestamps of agg * would have been even lower than they are after the backward shift, * because QFQ+ would have guaranteed a maximum value equal to 2 for * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose * service is postponed because of the backward-shift would have * however waited for the service of agg before being served. * * The other event that may cause the slot index to be higher than 2 * for agg is a recent change of the parameters of some class. If the * weight of a class is increased or the lmax (max_pkt_size) of the * class is decreased, then a new aggregate with smaller slot size * than the original parent aggregate of the class may happen to be * activated. The activation of this aggregate should be properly * delayed to when the service of the class has finished in the ideal * system tracked by QFQ+. If the activation of the aggregate is not * delayed to this reference time instant, then this aggregate may be * unjustly served before other aggregates waiting for service. This * may cause the above bound to the slot index to be violated for some * of these unlucky aggregates. * * Instead of delaying the activation of the new aggregate, which is * quite complex, the above-discussed capping of the slot index is * used to handle also the consequences of a change of the parameters * of a class. */ static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg, u64 roundedS) { u64 slot = (roundedS - grp->S) >> grp->slot_shift; unsigned int i; /* slot index in the bucket list */ if (unlikely(slot > QFQ_MAX_SLOTS - 2)) { u64 deltaS = roundedS - grp->S - ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift); agg->S -= deltaS; agg->F -= deltaS; slot = QFQ_MAX_SLOTS - 2; } i = (grp->front + slot) % QFQ_MAX_SLOTS; hlist_add_head(&agg->next, &grp->slots[i]); __set_bit(slot, &grp->full_slots); } /* Maybe introduce hlist_first_entry?? */ static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp) { return hlist_entry(grp->slots[grp->front].first, struct qfq_aggregate, next); } /* * remove the entry from the slot */ static void qfq_front_slot_remove(struct qfq_group *grp) { struct qfq_aggregate *agg = qfq_slot_head(grp); BUG_ON(!agg); hlist_del(&agg->next); if (hlist_empty(&grp->slots[grp->front])) __clear_bit(0, &grp->full_slots); } /* * Returns the first aggregate in the first non-empty bucket of the * group. As a side effect, adjusts the bucket list so the first * non-empty bucket is at position 0 in full_slots. */ static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp) { unsigned int i; pr_debug("qfq slot_scan: grp %u full %#lx\n", grp->index, grp->full_slots); if (grp->full_slots == 0) return NULL; i = __ffs(grp->full_slots); /* zero based */ if (i > 0) { grp->front = (grp->front + i) % QFQ_MAX_SLOTS; grp->full_slots >>= i; } return qfq_slot_head(grp); } /* * adjust the bucket list. When the start time of a group decreases, * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to * move the objects. The mask of occupied slots must be shifted * because we use ffs() to find the first non-empty slot. * This covers decreases in the group's start time, but what about * increases of the start time ? * Here too we should make sure that i is less than 32 */ static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS) { unsigned int i = (grp->S - roundedS) >> grp->slot_shift; grp->full_slots <<= i; grp->front = (grp->front - i) % QFQ_MAX_SLOTS; } static void qfq_update_eligible(struct qfq_sched *q) { struct qfq_group *grp; unsigned long ineligible; ineligible = q->bitmaps[IR] | q->bitmaps[IB]; if (ineligible) { if (!q->bitmaps[ER]) { grp = qfq_ffs(q, ineligible); if (qfq_gt(grp->S, q->V)) q->V = grp->S; } qfq_make_eligible(q); } } /* Dequeue head packet of the head class in the DRR queue of the aggregate. */ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, struct qfq_class *cl, unsigned int len) { struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc); if (!skb) return NULL; cl->deficit -= (int) len; if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ list_del(&cl->alist); else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } return skb; } static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, struct qfq_class **cl, unsigned int *len) { struct sk_buff *skb; *cl = list_first_entry(&agg->active, struct qfq_class, alist); skb = (*cl)->qdisc->ops->peek((*cl)->qdisc); if (skb == NULL) qdisc_warn_nonwc("qfq_dequeue", (*cl)->qdisc); else *len = qdisc_pkt_len(skb); return skb; } /* Update F according to the actual service received by the aggregate. */ static inline void charge_actual_service(struct qfq_aggregate *agg) { /* Compute the service received by the aggregate, taking into * account that, after decreasing the number of classes in * agg, it may happen that * agg->initial_budget - agg->budget > agg->bugdetmax */ u32 service_received = min(agg->budgetmax, agg->initial_budget - agg->budget); agg->F = agg->S + (u64)service_received * agg->inv_w; } /* Assign a reasonable start time for a new aggregate in group i. * Admissible values for \hat(F) are multiples of \sigma_i * no greater than V+\sigma_i . Larger values mean that * we had a wraparound so we consider the timestamp to be stale. * * If F is not stale and F >= V then we set S = F. * Otherwise we should assign S = V, but this may violate * the ordering in EB (see [2]). So, if we have groups in ER, * set S to the F_j of the first group j which would be blocking us. * We are guaranteed not to move S backward because * otherwise our group i would still be blocked. */ static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg) { unsigned long mask; u64 limit, roundedF; int slot_shift = agg->grp->slot_shift; roundedF = qfq_round_down(agg->F, slot_shift); limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) { /* timestamp was stale */ mask = mask_from(q->bitmaps[ER], agg->grp->index); if (mask) { struct qfq_group *next = qfq_ffs(q, mask); if (qfq_gt(roundedF, next->F)) { if (qfq_gt(limit, next->F)) agg->S = next->F; else /* preserve timestamp correctness */ agg->S = limit; return; } } agg->S = q->V; } else /* timestamp is not stale */ agg->S = agg->F; } /* Update the timestamps of agg before scheduling/rescheduling it for * service. In particular, assign to agg->F its maximum possible * value, i.e., the virtual finish time with which the aggregate * should be labeled if it used all its budget once in service. */ static inline void qfq_update_agg_ts(struct qfq_sched *q, struct qfq_aggregate *agg, enum update_reason reason) { if (reason != requeue) qfq_update_start(q, agg); else /* just charge agg for the service received */ agg->S = agg->F; agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w; } static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg); static struct sk_buff *qfq_dequeue(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_aggregate *in_serv_agg = q->in_serv_agg; struct qfq_class *cl; struct sk_buff *skb = NULL; /* next-packet len, 0 means no more active classes in in-service agg */ unsigned int len = 0; if (in_serv_agg == NULL) return NULL; if (!list_empty(&in_serv_agg->active)) skb = qfq_peek_skb(in_serv_agg, &cl, &len); /* * If there are no active classes in the in-service aggregate, * or if the aggregate has not enough budget to serve its next * class, then choose the next aggregate to serve. */ if (len == 0 || in_serv_agg->budget < len) { charge_actual_service(in_serv_agg); /* recharge the budget of the aggregate */ in_serv_agg->initial_budget = in_serv_agg->budget = in_serv_agg->budgetmax; if (!list_empty(&in_serv_agg->active)) { /* * Still active: reschedule for * service. Possible optimization: if no other * aggregate is active, then there is no point * in rescheduling this aggregate, and we can * just keep it as the in-service one. This * should be however a corner case, and to * handle it, we would need to maintain an * extra num_active_aggs field. */ qfq_update_agg_ts(q, in_serv_agg, requeue); qfq_schedule_agg(q, in_serv_agg); } else if (sch->q.qlen == 0) { /* no aggregate to serve */ q->in_serv_agg = NULL; return NULL; } /* * If we get here, there are other aggregates queued: * choose the new aggregate to serve. */ in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q); skb = qfq_peek_skb(in_serv_agg, &cl, &len); } if (!skb) return NULL; sch->q.qlen--; skb = agg_dequeue(in_serv_agg, cl, len); if (!skb) { sch->q.qlen++; return NULL; } qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); /* If lmax is lowered, through qfq_change_class, for a class * owning pending packets with larger size than the new value * of lmax, then the following condition may hold. */ if (unlikely(in_serv_agg->budget < len)) in_serv_agg->budget = 0; else in_serv_agg->budget -= len; q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); return skb; } static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) { struct qfq_group *grp; struct qfq_aggregate *agg, *new_front_agg; u64 old_F; qfq_update_eligible(q); q->oldV = q->V; if (!q->bitmaps[ER]) return NULL; grp = qfq_ffs(q, q->bitmaps[ER]); old_F = grp->F; agg = qfq_slot_head(grp); /* agg starts to be served, remove it from schedule */ qfq_front_slot_remove(grp); new_front_agg = qfq_slot_scan(grp); if (new_front_agg == NULL) /* group is now inactive, remove from ER */ __clear_bit(grp->index, &q->bitmaps[ER]); else { u64 roundedS = qfq_round_down(new_front_agg->S, grp->slot_shift); unsigned int s; if (grp->S == roundedS) return agg; grp->S = roundedS; grp->F = roundedS + (2ULL << grp->slot_shift); __clear_bit(grp->index, &q->bitmaps[ER]); s = qfq_calc_state(q, grp); __set_bit(grp->index, &q->bitmaps[s]); } qfq_unblock_groups(q, grp->index, old_F); return agg; } static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb), gso_segs; struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { if (err & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); return err; } pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); if (unlikely(cl->agg->lmax < len)) { pr_debug("qfq: increasing maxpkt from %u to %u for class %u", cl->agg->lmax, len, cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); if (err) { cl->qstats.drops++; return qdisc_drop(skb, sch, to_free); } } gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); if (net_xmit_drop_count(err)) { cl->qstats.drops++; qdisc_qstats_drop(sch); } return err; } _bstats_update(&cl->bstats, len, gso_segs); sch->qstats.backlog += len; ++sch->q.qlen; agg = cl->agg; /* if the queue was not empty, then done here */ if (!first) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) == cl && cl->deficit < len) list_move_tail(&cl->alist, &agg->active); return err; } /* schedule class for service within the aggregate */ cl->deficit = agg->lmax; list_add_tail(&cl->alist, &agg->active); if (list_first_entry(&agg->active, struct qfq_class, alist) != cl || q->in_serv_agg == agg) return err; /* non-empty or in service, nothing else to do */ qfq_activate_agg(q, agg, enqueue); return err; } /* * Schedule aggregate according to its timestamps. */ static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { struct qfq_group *grp = agg->grp; u64 roundedS; int s; roundedS = qfq_round_down(agg->S, grp->slot_shift); /* * Insert agg in the correct bucket. * If agg->S >= grp->S we don't need to adjust the * bucket list and simply go to the insertion phase. * Otherwise grp->S is decreasing, we must make room * in the bucket list, and also recompute the group state. * Finally, if there were no flows in this group and nobody * was in ER make sure to adjust V. */ if (grp->full_slots) { if (!qfq_gt(grp->S, agg->S)) goto skip_update; /* create a slot for this agg->S */ qfq_slot_rotate(grp, roundedS); /* group was surely ineligible, remove */ __clear_bit(grp->index, &q->bitmaps[IR]); __clear_bit(grp->index, &q->bitmaps[IB]); } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) && q->in_serv_agg == NULL) q->V = roundedS; grp->S = roundedS; grp->F = roundedS + (2ULL << grp->slot_shift); s = qfq_calc_state(q, grp); __set_bit(grp->index, &q->bitmaps[s]); pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n", s, q->bitmaps[s], (unsigned long long) agg->S, (unsigned long long) agg->F, (unsigned long long) q->V); skip_update: qfq_slot_insert(grp, agg, roundedS); } /* Update agg ts and schedule agg for service */ static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg, enum update_reason reason) { agg->initial_budget = agg->budget = agg->budgetmax; /* recharge budg. */ qfq_update_agg_ts(q, agg, reason); if (q->in_serv_agg == NULL) { /* no aggr. in service or scheduled */ q->in_serv_agg = agg; /* start serving this aggregate */ /* update V: to be in service, agg must be eligible */ q->oldV = q->V = agg->S; } else if (agg != q->in_serv_agg) qfq_schedule_agg(q, agg); } static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp, struct qfq_aggregate *agg) { unsigned int i, offset; u64 roundedS; roundedS = qfq_round_down(agg->S, grp->slot_shift); offset = (roundedS - grp->S) >> grp->slot_shift; i = (grp->front + offset) % QFQ_MAX_SLOTS; hlist_del(&agg->next); if (hlist_empty(&grp->slots[i])) __clear_bit(offset, &grp->full_slots); } /* * Called to forcibly deschedule an aggregate. If the aggregate is * not in the front bucket, or if the latter has other aggregates in * the front bucket, we can simply remove the aggregate with no other * side effects. * Otherwise we must propagate the event up. */ static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { struct qfq_group *grp = agg->grp; unsigned long mask; u64 roundedS; int s; if (agg == q->in_serv_agg) { charge_actual_service(agg); q->in_serv_agg = qfq_choose_next_agg(q); return; } agg->F = agg->S; qfq_slot_remove(q, grp, agg); if (!grp->full_slots) { __clear_bit(grp->index, &q->bitmaps[IR]); __clear_bit(grp->index, &q->bitmaps[EB]); __clear_bit(grp->index, &q->bitmaps[IB]); if (test_bit(grp->index, &q->bitmaps[ER]) && !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) { mask = q->bitmaps[ER] & ((1UL << grp->index) - 1); if (mask) mask = ~((1UL << __fls(mask)) - 1); else mask = ~0UL; qfq_move_groups(q, mask, EB, ER); qfq_move_groups(q, mask, IB, IR); } __clear_bit(grp->index, &q->bitmaps[ER]); } else if (hlist_empty(&grp->slots[grp->front])) { agg = qfq_slot_scan(grp); roundedS = qfq_round_down(agg->S, grp->slot_shift); if (grp->S != roundedS) { __clear_bit(grp->index, &q->bitmaps[ER]); __clear_bit(grp->index, &q->bitmaps[IR]); __clear_bit(grp->index, &q->bitmaps[EB]); __clear_bit(grp->index, &q->bitmaps[IB]); grp->S = roundedS; grp->F = roundedS + (2ULL << grp->slot_shift); s = qfq_calc_state(q, grp); __set_bit(grp->index, &q->bitmaps[s]); } } } static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; qfq_deactivate_class(q, cl); } static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_group *grp; int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1, QFQ_MAX_AGG_CLASSES); /* max_cl_shift = floor(log_2(max_classes)) */ max_cl_shift = __fls(max_classes); q->max_agg_classes = 1<<max_cl_shift; /* maxbudg_shift = log2(max_len * max_classes_per_agg) */ maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift; q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX; for (i = 0; i <= QFQ_MAX_INDEX; i++) { grp = &q->groups[i]; grp->index = i; grp->slot_shift = q->min_slot_shift + i; for (j = 0; j < QFQ_MAX_SLOTS; j++) INIT_HLIST_HEAD(&grp->slots[j]); } INIT_HLIST_HEAD(&q->nonfull_aggs); return 0; } static void qfq_reset_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen > 0) qfq_deactivate_class(q, cl); qdisc_reset(cl->qdisc); } } } static void qfq_destroy_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct hlist_node *next; unsigned int i; tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) { qfq_destroy_class(sch, cl); } } qdisc_class_hash_destroy(&q->clhash); } static const struct Qdisc_class_ops qfq_class_ops = { .change = qfq_change_class, .delete = qfq_delete_class, .find = qfq_search_class, .tcf_block = qfq_tcf_block, .bind_tcf = qfq_bind_tcf, .unbind_tcf = qfq_unbind_tcf, .graft = qfq_graft_class, .leaf = qfq_class_leaf, .qlen_notify = qfq_qlen_notify, .dump = qfq_dump_class, .dump_stats = qfq_dump_class_stats, .walk = qfq_walk, }; static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { .cl_ops = &qfq_class_ops, .id = "qfq", .priv_size = sizeof(struct qfq_sched), .enqueue = qfq_enqueue, .dequeue = qfq_dequeue, .peek = qdisc_peek_dequeued, .init = qfq_init_qdisc, .reset = qfq_reset_qdisc, .destroy = qfq_destroy_qdisc, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("qfq"); static int __init qfq_init(void) { return register_qdisc(&qfq_qdisc_ops); } static void __exit qfq_exit(void) { unregister_qdisc(&qfq_qdisc_ops); } module_init(qfq_init); module_exit(qfq_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc"); |
2 8 8 5 10 9 10 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <net/netrom.h> static void nr_heartbeat_expiry(struct timer_list *); static void nr_t1timer_expiry(struct timer_list *); static void nr_t2timer_expiry(struct timer_list *); static void nr_t4timer_expiry(struct timer_list *); static void nr_idletimer_expiry(struct timer_list *); void nr_init_timers(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); timer_setup(&nr->t1timer, nr_t1timer_expiry, 0); timer_setup(&nr->t2timer, nr_t2timer_expiry, 0); timer_setup(&nr->t4timer, nr_t4timer_expiry, 0); timer_setup(&nr->idletimer, nr_idletimer_expiry, 0); /* initialized by sock_init_data */ sk->sk_timer.function = nr_heartbeat_expiry; } void nr_start_t1timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); } void nr_start_t2timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); } void nr_start_t4timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); } void nr_start_idletimer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); if (nr->idle > 0) sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); } void nr_start_heartbeat(struct sock *sk) { sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); } void nr_stop_t1timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t1timer); } void nr_stop_t2timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t2timer); } void nr_stop_t4timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t4timer); } void nr_stop_idletimer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->idletimer); } void nr_stop_heartbeat(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } int nr_t1timer_running(struct sock *sk) { return timer_pending(&nr_sk(sk)->t1timer); } static void nr_heartbeat_expiry(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); struct nr_sock *nr = nr_sk(sk); bh_lock_sock(sk); switch (nr->state) { case NR_STATE_0: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { if (sk->sk_state == TCP_LISTEN) sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; } break; case NR_STATE_3: /* * Check for the state of the receive buffer. */ if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && (nr->condition & NR_COND_OWN_RX_BUSY)) { nr->condition &= ~NR_COND_OWN_RX_BUSY; nr->condition &= ~NR_COND_ACK_PENDING; nr->vl = nr->vr; nr_write_internal(sk, NR_INFOACK); break; } break; } nr_start_heartbeat(sk); bh_unlock_sock(sk); out: sock_put(sk); } static void nr_t2timer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, t2timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); if (nr->condition & NR_COND_ACK_PENDING) { nr->condition &= ~NR_COND_ACK_PENDING; nr_enquiry_response(sk); } bh_unlock_sock(sk); sock_put(sk); } static void nr_t4timer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, t4timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); sock_put(sk); } static void nr_idletimer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, idletimer); struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_clear_queues(sk); nr->n2count = 0; nr_write_internal(sk, NR_DISCREQ); nr->state = NR_STATE_2; nr_start_t1timer(sk); nr_stop_t2timer(sk); nr_stop_t4timer(sk); sk->sk_state = TCP_CLOSE; sk->sk_err = 0; sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); sock_put(sk); } static void nr_t1timer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, t1timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); switch (nr->state) { case NR_STATE_1: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_write_internal(sk, NR_CONNREQ); } break; case NR_STATE_2: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_write_internal(sk, NR_DISCREQ); } break; case NR_STATE_3: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_requeue_frames(sk); } break; } nr_start_t1timer(sk); out: bh_unlock_sock(sk); sock_put(sk); } |
3 1 4 910 824 827 333 335 335 2 44 12 361 364 1 1 1 403 213 361 215 84 7 408 1 409 359 1 358 361 408 407 23 406 50 41 408 405 408 408 60 11 1 10 8 1 1 5 4 7 2 9 1 1 1 5 5 5 4 3 3 2 13 13 13 3 9 10 10 10 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 | // SPDX-License-Identifier: GPL-2.0-only /* * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules * * Copyright (C)2003-2006 Helsinki University of Technology * Copyright (C)2003-2006 USAGI/WIDE Project * * Authors * Thomas Graf <tgraf@suug.ch> * Ville Nuorvala <vnuorval@tcs.hut.fi> */ #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/export.h> #include <linux/indirect_call_wrapper.h> #include <net/fib_rules.h> #include <net/inet_dscp.h> #include <net/ipv6.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/netlink.h> struct fib6_rule { struct fib_rule common; struct rt6key src; struct rt6key dst; dscp_t dscp; u8 dscp_full:1; /* DSCP or TOS selector */ }; static bool fib6_rule_matchall(const struct fib_rule *rule) { struct fib6_rule *r = container_of(rule, struct fib6_rule, common); if (r->dst.plen || r->src.plen || r->dscp) return false; return fib_rule_matchall(rule); } bool fib6_rule_default(const struct fib_rule *rule) { if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || rule->l3mdev) return false; if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN) return false; return true; } EXPORT_SYMBOL_GPL(fib6_rule_default); int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return fib_rules_dump(net, nb, AF_INET6, extack); } unsigned int fib6_rules_seq_read(struct net *net) { return fib_rules_seq_read(net, AF_INET6); } /* called with rcu lock held; no reference taken on fib6_info */ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, struct fib6_result *res, int flags) { int err; if (net->ipv6.fib6_has_custom_rules) { struct fib_lookup_arg arg = { .lookup_ptr = fib6_table_lookup, .lookup_data = &oif, .result = res, .flags = FIB_LOOKUP_NOREF, }; l3mdev_update_flow(net, flowi6_to_flowi(fl6)); err = fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); } else { err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif, fl6, res, flags); if (err || res->f6i == net->ipv6.fib6_null_entry) err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, res, flags); } return err; } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup) { if (net->ipv6.fib6_has_custom_rules) { struct fib6_result res = {}; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .lookup_data = skb, .result = &res, .flags = FIB_LOOKUP_NOREF, }; /* update flow if oif or iif point to device enslaved to l3mdev */ l3mdev_update_flow(net, flowi6_to_flowi(fl6)); fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); if (res.rt6) return &res.rt6->dst; } else { struct rt6_info *rt; rt = pol_lookup_func(lookup, net, net->ipv6.fib6_local_tbl, fl6, skb, flags); if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put_flags(rt, flags); rt = pol_lookup_func(lookup, net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put_flags(rt, flags); } if (!(flags & RT6_LOOKUP_F_DST_NOREF)) dst_hold(&net->ipv6.ip6_null_entry->dst); return &net->ipv6.ip6_null_entry->dst; } static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags, struct flowi6 *flp6, const struct net_device *dev) { struct fib6_rule *r = (struct fib6_rule *)rule; /* If we need to find a source address for this traffic, * we check the result if it meets requirement of the rule. */ if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; if (ipv6_dev_get_saddr(net, dev, &flp6->daddr, rt6_flags2srcprefs(flags), &saddr)) return -EAGAIN; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) return -EAGAIN; flp6->saddr = saddr; } return 0; } static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct flowi6 *flp6 = &flp->u.ip6; struct net *net = rule->fr_net; struct fib6_table *table; int err, *oif; u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: return -ENETUNREACH; case FR_ACT_PROHIBIT: return -EACCES; case FR_ACT_BLACKHOLE: default: return -EINVAL; } tb_id = fib_rule_get_table(rule, arg); table = fib6_get_table(net, tb_id); if (!table) return -EAGAIN; oif = (int *)arg->lookup_data; err = fib6_table_lookup(net, table, *oif, flp6, res, flags); if (!err && res->f6i != net->ipv6.fib6_null_entry) err = fib6_rule_saddr(net, rule, flags, flp6, res->nh->fib_nh_dev); else err = -EAGAIN; return err; } static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct flowi6 *flp6 = &flp->u.ip6; struct rt6_info *rt = NULL; struct fib6_table *table; struct net *net = rule->fr_net; pol_lookup_t lookup = arg->lookup_ptr; int err = 0; u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: err = -ENETUNREACH; rt = net->ipv6.ip6_null_entry; goto discard_pkt; default: case FR_ACT_BLACKHOLE: err = -EINVAL; rt = net->ipv6.ip6_blk_hole_entry; goto discard_pkt; case FR_ACT_PROHIBIT: err = -EACCES; rt = net->ipv6.ip6_prohibit_entry; goto discard_pkt; } tb_id = fib_rule_get_table(rule, arg); table = fib6_get_table(net, tb_id); if (!table) { err = -EAGAIN; goto out; } rt = pol_lookup_func(lookup, net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { struct inet6_dev *idev = ip6_dst_idev(&rt->dst); if (!idev) goto again; err = fib6_rule_saddr(net, rule, flags, flp6, idev->dev); if (err == -EAGAIN) goto again; err = rt->dst.error; if (err != -EAGAIN) goto out; } again: ip6_rt_put_flags(rt, flags); err = -EAGAIN; rt = NULL; goto out; discard_pkt: if (!(flags & RT6_LOOKUP_F_DST_NOREF)) dst_hold(&rt->dst); out: res->rt6 = rt; return err; } INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { if (arg->lookup_ptr == fib6_table_lookup) return fib6_rule_action_alt(rule, flp, flags, arg); return __fib6_rule_action(rule, flp, flags, arg); } INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct rt6_info *rt = res->rt6; struct net_device *dev = NULL; if (!rt) return false; if (rt->rt6i_idev) dev = rt->rt6i_idev->dev; /* do not accept result if the route does * not meet the required prefix length */ if (rt->rt6i_dst.plen <= rule->suppress_prefixlen) goto suppress_route; /* do not accept result if the route uses a device * belonging to a forbidden interface group */ if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup) goto suppress_route; return false; suppress_route: ip6_rt_put_flags(rt, flags); return true; } INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib6_rule *r = (struct fib6_rule *) rule; struct flowi6 *fl6 = &fl->u.ip6; if (r->dst.plen && !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen)) return 0; /* * If FIB_RULE_FIND_SADDR is set and we do not have a * source address for the traffic, we defer check for * source address. */ if (r->src.plen) { if (flags & RT6_LOOKUP_F_HAS_SADDR) { if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr, r->src.plen)) return 0; } else if (!(r->common.flags & FIB_RULE_FIND_SADDR)) return 0; } if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) return 0; if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) return 0; if (fib_rule_port_range_set(&rule->sport_range) && !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport)) return 0; if (fib_rule_port_range_set(&rule->dport_range) && !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport)) return 0; return 1; } static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6, struct netlink_ext_ack *extack) { if (rule6->dscp) { NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP"); return -EINVAL; } rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); rule6->dscp_full = true; return 0; } static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, struct netlink_ext_ack *extack) { int err = -EINVAL; struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (!inet_validate_dscp(frh->tos)) { NL_SET_ERR_MSG(extack, "Invalid dsfield (tos): ECN bits must be 0"); goto errout; } rule6->dscp = inet_dsfield_to_dscp(frh->tos); if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0) goto errout; if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid table"); goto errout; } if (fib6_new_table(net, rule->table) == NULL) { err = -ENOBUFS; goto errout; } } if (frh->src_len) rule6->src.addr = nla_get_in6_addr(tb[FRA_SRC]); if (frh->dst_len) rule6->dst.addr = nla_get_in6_addr(tb[FRA_DST]); rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; if (fib_rule_requires_fldissect(rule)) net->ipv6.fib6_rules_require_fldissect++; net->ipv6.fib6_has_custom_rules = true; err = 0; errout: return err; } static int fib6_rule_delete(struct fib_rule *rule) { struct net *net = rule->fr_net; if (net->ipv6.fib6_rules_require_fldissect && fib_rule_requires_fldissect(rule)) net->ipv6.fib6_rules_require_fldissect--; return 0; } static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (frh->src_len && (rule6->src.plen != frh->src_len)) return 0; if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) return 0; if (frh->tos && (rule6->dscp_full || inet_dscp_to_dsfield(rule6->dscp) != frh->tos)) return 0; if (tb[FRA_DSCP]) { dscp_t dscp; dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2); if (!rule6->dscp_full || rule6->dscp != dscp) return 0; } if (frh->src_len && nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) return 0; if (frh->dst_len && nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) return 0; return 1; } static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh) { struct fib6_rule *rule6 = (struct fib6_rule *) rule; frh->dst_len = rule6->dst.plen; frh->src_len = rule6->src.plen; if (rule6->dscp_full) { frh->tos = 0; if (nla_put_u8(skb, FRA_DSCP, inet_dscp_to_dsfield(rule6->dscp) >> 2)) goto nla_put_failure; } else { frh->tos = inet_dscp_to_dsfield(rule6->dscp); } if ((rule6->dst.plen && nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || (rule6->src.plen && nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr))) goto nla_put_failure; return 0; nla_put_failure: return -ENOBUFS; } static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) { return nla_total_size(16) /* dst */ + nla_total_size(16) /* src */ + nla_total_size(1); /* dscp */ } static void fib6_rule_flush_cache(struct fib_rules_ops *ops) { rt_genid_bump_ipv6(ops->fro_net); } static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), .action = fib6_rule_action, .match = fib6_rule_match, .suppress = fib6_rule_suppress, .configure = fib6_rule_configure, .delete = fib6_rule_delete, .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, .flush_cache = fib6_rule_flush_cache, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, .fro_net = &init_net, }; static int __net_init fib6_rules_net_init(struct net *net) { struct fib_rules_ops *ops; int err; ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL); if (err) goto out_fib6_rules_ops; err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN); if (err) goto out_fib6_rules_ops; net->ipv6.fib6_rules_ops = ops; net->ipv6.fib6_rules_require_fldissect = 0; out: return err; out_fib6_rules_ops: fib_rules_unregister(ops); goto out; } static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list) { struct net *net; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { fib_rules_unregister(net->ipv6.fib6_rules_ops); cond_resched(); } rtnl_unlock(); } static struct pernet_operations fib6_rules_net_ops = { .init = fib6_rules_net_init, .exit_batch = fib6_rules_net_exit_batch, }; int __init fib6_rules_init(void) { return register_pernet_subsys(&fib6_rules_net_ops); } void fib6_rules_cleanup(void) { unregister_pernet_subsys(&fib6_rules_net_ops); } |
1 1 1 513 510 512 498 195 456 468 514 1 1 1 1 1 1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 1 2 1 1 1 1 1 1 1 1 1 1 1 1 874 872 412 410 176 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 | // SPDX-License-Identifier: GPL-2.0-only /* * Monitoring code for network dropped packet alerts * * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> #include <linux/if_arp.h> #include <linux/inetdevice.h> #include <linux/inet.h> #include <linux/interrupt.h> #include <linux/netpoll.h> #include <linux/sched.h> #include <linux/delay.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/netlink.h> #include <linux/net_dropmon.h> #include <linux/bitfield.h> #include <linux/percpu.h> #include <linux/timer.h> #include <linux/bitops.h> #include <linux/slab.h> #include <linux/module.h> #include <net/genetlink.h> #include <net/netevent.h> #include <net/flow_offload.h> #include <net/dropreason.h> #include <net/devlink.h> #include <trace/events/skb.h> #include <trace/events/napi.h> #include <trace/events/devlink.h> #include <asm/unaligned.h> #define TRACE_ON 1 #define TRACE_OFF 0 /* * Globals, our netlink socket pointer * and the work handle that will send up * netlink alerts */ static int trace_state = TRACE_OFF; static bool monitor_hw; /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. */ static DEFINE_MUTEX(net_dm_mutex); struct net_dm_stats { u64_stats_t dropped; struct u64_stats_sync syncp; }; #define NET_DM_MAX_HW_TRAP_NAME_LEN 40 struct net_dm_hw_entry { char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN]; u32 count; }; struct net_dm_hw_entries { u32 num_entries; struct net_dm_hw_entry entries[]; }; struct per_cpu_dm_data { raw_spinlock_t lock; /* Protects 'skb', 'hw_entries' and * 'send_timer' */ union { struct sk_buff *skb; struct net_dm_hw_entries *hw_entries; }; struct sk_buff_head drop_queue; struct work_struct dm_alert_work; struct timer_list send_timer; struct net_dm_stats stats; }; struct dm_hw_stat_delta { unsigned long last_rx; unsigned long last_drop_val; struct rcu_head rcu; }; static struct genl_family net_drop_monitor_family; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data); static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY; static u32 net_dm_trunc_len; static u32 net_dm_queue_len = 1000; struct net_dm_alert_ops { void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, void *location, enum skb_drop_reason reason, struct sock *rx_sk); void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, int work, int budget); void (*work_item_func)(struct work_struct *work); void (*hw_work_item_func)(struct work_struct *work); void (*hw_trap_probe)(void *ignore, const struct devlink *devlink, struct sk_buff *skb, const struct devlink_trap_metadata *metadata); }; struct net_dm_skb_cb { union { struct devlink_trap_metadata *hw_metadata; void *pc; }; enum skb_drop_reason reason; }; #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0])) static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; struct sk_buff *skb; unsigned long flags; void *msg_header; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); al += sizeof(struct nlattr); skb = genlmsg_new(al, GFP_KERNEL); if (!skb) goto err; msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, 0, NET_DM_CMD_ALERT); if (!msg_header) { nlmsg_free(skb); skb = NULL; goto err; } nla = nla_reserve(skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); if (!nla) { nlmsg_free(skb); skb = NULL; goto err; } msg = nla_data(nla); memset(msg, 0, al); goto out; err: mod_timer(&data->send_timer, jiffies + HZ / 10); out: raw_spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); raw_spin_unlock_irqrestore(&data->lock, flags); if (skb) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); genlmsg_end(skb, genlmsg_data(gnlh)); } return skb; } static const struct genl_multicast_group dropmon_mcgrps[] = { { .name = "events", .flags = GENL_MCAST_CAP_SYS_ADMIN, }, }; static void send_dm_alert(struct work_struct *work) { struct sk_buff *skb; struct per_cpu_dm_data *data; data = container_of(work, struct per_cpu_dm_data, dm_alert_work); skb = reset_per_cpu_data(data); if (skb) genlmsg_multicast(&net_drop_monitor_family, skb, 0, 0, GFP_KERNEL); } /* * This is the timer function to delay the sending of an alert * in the event that more drops will arrive during the * hysteresis period. */ static void sched_send_work(struct timer_list *t) { struct per_cpu_dm_data *data = from_timer(data, t, send_timer); schedule_work(&data->dm_alert_work); } static void trace_drop_common(struct sk_buff *skb, void *location) { struct net_dm_alert_msg *msg; struct net_dm_drop_point *point; struct nlmsghdr *nlh; struct nlattr *nla; int i; struct sk_buff *dskb; struct per_cpu_dm_data *data; unsigned long flags; local_irq_save(flags); data = this_cpu_ptr(&dm_cpu_data); raw_spin_lock(&data->lock); dskb = data->skb; if (!dskb) goto out; nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); point = msg->points; for (i = 0; i < msg->entries; i++) { if (!memcmp(&location, &point->pc, sizeof(void *))) { point->count++; goto out; } point++; } if (msg->entries == dm_hit_limit) goto out; /* * We need to create a new entry */ __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); memcpy(point->pc, &location, sizeof(void *)); point->count = 1; msg->entries++; if (!timer_pending(&data->send_timer)) { data->send_timer.expires = jiffies + dm_delay * HZ; add_timer(&data->send_timer); } out: raw_spin_unlock_irqrestore(&data->lock, flags); } static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location, enum skb_drop_reason reason, struct sock *rx_sk) { trace_drop_common(skb, location); } static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, int work, int budget) { struct net_device *dev = napi->dev; struct dm_hw_stat_delta *stat; /* * Don't check napi structures with no associated device */ if (!dev) return; rcu_read_lock(); stat = rcu_dereference(dev->dm_private); if (stat) { /* * only add a note to our monitor buffer if: * 1) its after the last_rx delta * 2) our rx_dropped count has gone up */ if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) && (dev->stats.rx_dropped != stat->last_drop_val)) { trace_drop_common(NULL, NULL); stat->last_drop_val = dev->stats.rx_dropped; stat->last_rx = jiffies; } } rcu_read_unlock(); } static struct net_dm_hw_entries * net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data) { struct net_dm_hw_entries *hw_entries; unsigned long flags; hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit), GFP_KERNEL); if (!hw_entries) { /* If the memory allocation failed, we try to perform another * allocation in 1/10 second. Otherwise, the probe function * will constantly bail out. */ mod_timer(&hw_data->send_timer, jiffies + HZ / 10); } raw_spin_lock_irqsave(&hw_data->lock, flags); swap(hw_data->hw_entries, hw_entries); raw_spin_unlock_irqrestore(&hw_data->lock, flags); return hw_entries; } static int net_dm_hw_entry_put(struct sk_buff *msg, const struct net_dm_hw_entry *hw_entry) { struct nlattr *attr; attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY); if (!attr) return -EMSGSIZE; if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name)) goto nla_put_failure; if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static int net_dm_hw_entries_put(struct sk_buff *msg, const struct net_dm_hw_entries *hw_entries) { struct nlattr *attr; int i; attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES); if (!attr) return -EMSGSIZE; for (i = 0; i < hw_entries->num_entries; i++) { int rc; rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]); if (rc) goto nla_put_failure; } nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static int net_dm_hw_summary_report_fill(struct sk_buff *msg, const struct net_dm_hw_entries *hw_entries) { struct net_dm_alert_msg anc_hdr = { 0 }; void *hdr; int rc; hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, NET_DM_CMD_ALERT); if (!hdr) return -EMSGSIZE; /* We need to put the ancillary header in order not to break user * space. */ if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr)) goto nla_put_failure; rc = net_dm_hw_entries_put(msg, hw_entries); if (rc) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void net_dm_hw_summary_work(struct work_struct *work) { struct net_dm_hw_entries *hw_entries; struct per_cpu_dm_data *hw_data; struct sk_buff *msg; int rc; hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); if (!hw_entries) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out; rc = net_dm_hw_summary_report_fill(msg, hw_entries); if (rc) { nlmsg_free(msg); goto out; } genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); out: kfree(hw_entries); } static void net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink, struct sk_buff *skb, const struct devlink_trap_metadata *metadata) { struct net_dm_hw_entries *hw_entries; struct net_dm_hw_entry *hw_entry; struct per_cpu_dm_data *hw_data; unsigned long flags; int i; if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL) return; hw_data = this_cpu_ptr(&dm_hw_cpu_data); raw_spin_lock_irqsave(&hw_data->lock, flags); hw_entries = hw_data->hw_entries; if (!hw_entries) goto out; for (i = 0; i < hw_entries->num_entries; i++) { hw_entry = &hw_entries->entries[i]; if (!strncmp(hw_entry->trap_name, metadata->trap_name, NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) { hw_entry->count++; goto out; } } if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit)) goto out; hw_entry = &hw_entries->entries[hw_entries->num_entries]; strscpy(hw_entry->trap_name, metadata->trap_name, NET_DM_MAX_HW_TRAP_NAME_LEN - 1); hw_entry->count = 1; hw_entries->num_entries++; if (!timer_pending(&hw_data->send_timer)) { hw_data->send_timer.expires = jiffies + dm_delay * HZ; add_timer(&hw_data->send_timer); } out: raw_spin_unlock_irqrestore(&hw_data->lock, flags); } static const struct net_dm_alert_ops net_dm_alert_summary_ops = { .kfree_skb_probe = trace_kfree_skb_hit, .napi_poll_probe = trace_napi_poll_hit, .work_item_func = send_dm_alert, .hw_work_item_func = net_dm_hw_summary_work, .hw_trap_probe = net_dm_hw_trap_summary_probe, }; static void net_dm_packet_trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location, enum skb_drop_reason reason, struct sock *rx_sk) { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; struct net_dm_skb_cb *cb; struct sk_buff *nskb; unsigned long flags; if (!skb_mac_header_was_set(skb)) return; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return; cb = NET_DM_SKB_CB(nskb); cb->reason = reason; cb->pc = location; /* Override the timestamp because we care about the time when the * packet was dropped. */ nskb->tstamp = tstamp; data = this_cpu_ptr(&dm_cpu_data); spin_lock_irqsave(&data->drop_queue.lock, flags); if (skb_queue_len(&data->drop_queue) < net_dm_queue_len) __skb_queue_tail(&data->drop_queue, nskb); else goto unlock_free; spin_unlock_irqrestore(&data->drop_queue.lock, flags); schedule_work(&data->dm_alert_work); return; unlock_free: spin_unlock_irqrestore(&data->drop_queue.lock, flags); u64_stats_update_begin(&data->stats.syncp); u64_stats_inc(&data->stats.dropped); u64_stats_update_end(&data->stats.syncp); consume_skb(nskb); } static void net_dm_packet_trace_napi_poll_hit(void *ignore, struct napi_struct *napi, int work, int budget) { } static size_t net_dm_in_port_size(void) { /* NET_DM_ATTR_IN_PORT nest */ return nla_total_size(0) + /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */ nla_total_size(sizeof(u32)) + /* NET_DM_ATTR_PORT_NETDEV_NAME */ nla_total_size(IFNAMSIZ + 1); } #define NET_DM_MAX_SYMBOL_LEN 40 #define NET_DM_MAX_REASON_LEN 50 static size_t net_dm_packet_report_size(size_t payload_len) { size_t size; size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); return NLMSG_ALIGN(size) + /* NET_DM_ATTR_ORIGIN */ nla_total_size(sizeof(u16)) + /* NET_DM_ATTR_PC */ nla_total_size(sizeof(u64)) + /* NET_DM_ATTR_SYMBOL */ nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) + /* NET_DM_ATTR_IN_PORT */ net_dm_in_port_size() + /* NET_DM_ATTR_TIMESTAMP */ nla_total_size(sizeof(u64)) + /* NET_DM_ATTR_ORIG_LEN */ nla_total_size(sizeof(u32)) + /* NET_DM_ATTR_PROTO */ nla_total_size(sizeof(u16)) + /* NET_DM_ATTR_REASON */ nla_total_size(NET_DM_MAX_REASON_LEN + 1) + /* NET_DM_ATTR_PAYLOAD */ nla_total_size(payload_len); } static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex, const char *name) { struct nlattr *attr; attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT); if (!attr) return -EMSGSIZE; if (ifindex && nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex)) goto nla_put_failure; if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb, size_t payload_len) { struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb); const struct drop_reason_list *list = NULL; unsigned int subsys, subsys_reason; char buf[NET_DM_MAX_SYMBOL_LEN]; struct nlattr *attr; void *hdr; int rc; hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, NET_DM_CMD_PACKET_ALERT); if (!hdr) return -EMSGSIZE; if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc, NET_DM_ATTR_PAD)) goto nla_put_failure; rcu_read_lock(); subsys = u32_get_bits(cb->reason, SKB_DROP_REASON_SUBSYS_MASK); if (subsys < SKB_DROP_REASON_SUBSYS_NUM) list = rcu_dereference(drop_reasons_by_subsys[subsys]); subsys_reason = cb->reason & ~SKB_DROP_REASON_SUBSYS_MASK; if (!list || subsys_reason >= list->n_reasons || !list->reasons[subsys_reason] || strlen(list->reasons[subsys_reason]) > NET_DM_MAX_REASON_LEN) { list = rcu_dereference(drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_CORE]); subsys_reason = SKB_DROP_REASON_NOT_SPECIFIED; } if (nla_put_string(msg, NET_DM_ATTR_REASON, list->reasons[subsys_reason])) { rcu_read_unlock(); goto nla_put_failure; } rcu_read_unlock(); snprintf(buf, sizeof(buf), "%pS", cb->pc); if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf)) goto nla_put_failure; rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL); if (rc) goto nla_put_failure; if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) goto nla_put_failure; if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) goto nla_put_failure; if (!payload_len) goto out; if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) goto nla_put_failure; attr = skb_put(msg, nla_total_size(payload_len)); attr->nla_type = NET_DM_ATTR_PAYLOAD; attr->nla_len = nla_attr_size(payload_len); if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) goto nla_put_failure; out: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } #define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO) static void net_dm_packet_report(struct sk_buff *skb) { struct sk_buff *msg; size_t payload_len; int rc; /* Make sure we start copying the packet from the MAC header */ if (skb->data > skb_mac_header(skb)) skb_push(skb, skb->data - skb_mac_header(skb)); else skb_pull(skb, skb_mac_header(skb) - skb->data); /* Ensure packet fits inside a single netlink attribute */ payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); if (net_dm_trunc_len) payload_len = min_t(size_t, net_dm_trunc_len, payload_len); msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL); if (!msg) goto out; rc = net_dm_packet_report_fill(msg, skb, payload_len); if (rc) { nlmsg_free(msg); goto out; } genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); out: consume_skb(skb); } static void net_dm_packet_work(struct work_struct *work) { struct per_cpu_dm_data *data; struct sk_buff_head list; struct sk_buff *skb; unsigned long flags; data = container_of(work, struct per_cpu_dm_data, dm_alert_work); __skb_queue_head_init(&list); spin_lock_irqsave(&data->drop_queue.lock, flags); skb_queue_splice_tail_init(&data->drop_queue, &list); spin_unlock_irqrestore(&data->drop_queue.lock, flags); while ((skb = __skb_dequeue(&list))) net_dm_packet_report(skb); } static size_t net_dm_flow_action_cookie_size(const struct devlink_trap_metadata *hw_metadata) { return hw_metadata->fa_cookie ? nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0; } static size_t net_dm_hw_packet_report_size(size_t payload_len, const struct devlink_trap_metadata *hw_metadata) { size_t size; size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); return NLMSG_ALIGN(size) + /* NET_DM_ATTR_ORIGIN */ nla_total_size(sizeof(u16)) + /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */ nla_total_size(strlen(hw_metadata->trap_group_name) + 1) + /* NET_DM_ATTR_HW_TRAP_NAME */ nla_total_size(strlen(hw_metadata->trap_name) + 1) + /* NET_DM_ATTR_IN_PORT */ net_dm_in_port_size() + /* NET_DM_ATTR_FLOW_ACTION_COOKIE */ net_dm_flow_action_cookie_size(hw_metadata) + /* NET_DM_ATTR_TIMESTAMP */ nla_total_size(sizeof(u64)) + /* NET_DM_ATTR_ORIG_LEN */ nla_total_size(sizeof(u32)) + /* NET_DM_ATTR_PROTO */ nla_total_size(sizeof(u16)) + /* NET_DM_ATTR_PAYLOAD */ nla_total_size(payload_len); } static int net_dm_hw_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb, size_t payload_len) { struct devlink_trap_metadata *hw_metadata; struct nlattr *attr; void *hdr; hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, NET_DM_CMD_PACKET_ALERT); if (!hdr) return -EMSGSIZE; if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW)) goto nla_put_failure; if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME, hw_metadata->trap_group_name)) goto nla_put_failure; if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_metadata->trap_name)) goto nla_put_failure; if (hw_metadata->input_dev) { struct net_device *dev = hw_metadata->input_dev; int rc; rc = net_dm_packet_report_in_port_put(msg, dev->ifindex, dev->name); if (rc) goto nla_put_failure; } if (hw_metadata->fa_cookie && nla_put(msg, NET_DM_ATTR_FLOW_ACTION_COOKIE, hw_metadata->fa_cookie->cookie_len, hw_metadata->fa_cookie->cookie)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) goto nla_put_failure; if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) goto nla_put_failure; if (!payload_len) goto out; if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) goto nla_put_failure; attr = skb_put(msg, nla_total_size(payload_len)); attr->nla_type = NET_DM_ATTR_PAYLOAD; attr->nla_len = nla_attr_size(payload_len); if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) goto nla_put_failure; out: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static struct devlink_trap_metadata * net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) { const struct flow_action_cookie *fa_cookie; struct devlink_trap_metadata *hw_metadata; const char *trap_group_name; const char *trap_name; hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC); if (!hw_metadata) return NULL; trap_group_name = kstrdup(metadata->trap_group_name, GFP_ATOMIC); if (!trap_group_name) goto free_hw_metadata; hw_metadata->trap_group_name = trap_group_name; trap_name = kstrdup(metadata->trap_name, GFP_ATOMIC); if (!trap_name) goto free_trap_group; hw_metadata->trap_name = trap_name; if (metadata->fa_cookie) { size_t cookie_size = sizeof(*fa_cookie) + metadata->fa_cookie->cookie_len; fa_cookie = kmemdup(metadata->fa_cookie, cookie_size, GFP_ATOMIC); if (!fa_cookie) goto free_trap_name; hw_metadata->fa_cookie = fa_cookie; } hw_metadata->input_dev = metadata->input_dev; netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC); return hw_metadata; free_trap_name: kfree(trap_name); free_trap_group: kfree(trap_group_name); free_hw_metadata: kfree(hw_metadata); return NULL; } static void net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata) { netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); kfree(hw_metadata); } static void net_dm_hw_packet_report(struct sk_buff *skb) { struct devlink_trap_metadata *hw_metadata; struct sk_buff *msg; size_t payload_len; int rc; if (skb->data > skb_mac_header(skb)) skb_push(skb, skb->data - skb_mac_header(skb)); else skb_pull(skb, skb_mac_header(skb) - skb->data); payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); if (net_dm_trunc_len) payload_len = min_t(size_t, net_dm_trunc_len, payload_len); hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata), GFP_KERNEL); if (!msg) goto out; rc = net_dm_hw_packet_report_fill(msg, skb, payload_len); if (rc) { nlmsg_free(msg); goto out; } genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); out: net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata); consume_skb(skb); } static void net_dm_hw_packet_work(struct work_struct *work) { struct per_cpu_dm_data *hw_data; struct sk_buff_head list; struct sk_buff *skb; unsigned long flags; hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); __skb_queue_head_init(&list); spin_lock_irqsave(&hw_data->drop_queue.lock, flags); skb_queue_splice_tail_init(&hw_data->drop_queue, &list); spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); while ((skb = __skb_dequeue(&list))) net_dm_hw_packet_report(skb); } static void net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink, struct sk_buff *skb, const struct devlink_trap_metadata *metadata) { struct devlink_trap_metadata *n_hw_metadata; ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *hw_data; struct sk_buff *nskb; unsigned long flags; if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL) return; if (!skb_mac_header_was_set(skb)) return; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return; n_hw_metadata = net_dm_hw_metadata_copy(metadata); if (!n_hw_metadata) goto free; NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata; nskb->tstamp = tstamp; hw_data = this_cpu_ptr(&dm_hw_cpu_data); spin_lock_irqsave(&hw_data->drop_queue.lock, flags); if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len) __skb_queue_tail(&hw_data->drop_queue, nskb); else goto unlock_free; spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); schedule_work(&hw_data->dm_alert_work); return; unlock_free: spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); u64_stats_update_begin(&hw_data->stats.syncp); u64_stats_inc(&hw_data->stats.dropped); u64_stats_update_end(&hw_data->stats.syncp); net_dm_hw_metadata_free(n_hw_metadata); free: consume_skb(nskb); } static const struct net_dm_alert_ops net_dm_alert_packet_ops = { .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit, .napi_poll_probe = net_dm_packet_trace_napi_poll_hit, .work_item_func = net_dm_packet_work, .hw_work_item_func = net_dm_hw_packet_work, .hw_trap_probe = net_dm_hw_trap_packet_probe, }; static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = { [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops, [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops, }; #if IS_ENABLED(CONFIG_NET_DEVLINK) static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops) { return register_trace_devlink_trap_report(ops->hw_trap_probe, NULL); } static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops) { unregister_trace_devlink_trap_report(ops->hw_trap_probe, NULL); tracepoint_synchronize_unregister(); } #else static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops) { return -EOPNOTSUPP; } static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops) { } #endif static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) { const struct net_dm_alert_ops *ops; int cpu, rc; if (monitor_hw) { NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled"); return -EAGAIN; } ops = net_dm_alert_ops_arr[net_dm_alert_mode]; if (!try_module_get(THIS_MODULE)) { NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); return -ENODEV; } for_each_possible_cpu(cpu) { struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); struct net_dm_hw_entries *hw_entries; INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func); timer_setup(&hw_data->send_timer, sched_send_work, 0); hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); kfree(hw_entries); } rc = net_dm_hw_probe_register(ops); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to devlink_trap_probe() tracepoint"); goto err_module_put; } monitor_hw = true; return 0; err_module_put: for_each_possible_cpu(cpu) { struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); struct sk_buff *skb; del_timer_sync(&hw_data->send_timer); cancel_work_sync(&hw_data->dm_alert_work); while ((skb = __skb_dequeue(&hw_data->drop_queue))) { struct devlink_trap_metadata *hw_metadata; hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; net_dm_hw_metadata_free(hw_metadata); consume_skb(skb); } } module_put(THIS_MODULE); return rc; } static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) { const struct net_dm_alert_ops *ops; int cpu; if (!monitor_hw) { NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); return; } ops = net_dm_alert_ops_arr[net_dm_alert_mode]; monitor_hw = false; net_dm_hw_probe_unregister(ops); for_each_possible_cpu(cpu) { struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); struct sk_buff *skb; del_timer_sync(&hw_data->send_timer); cancel_work_sync(&hw_data->dm_alert_work); while ((skb = __skb_dequeue(&hw_data->drop_queue))) { struct devlink_trap_metadata *hw_metadata; hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; net_dm_hw_metadata_free(hw_metadata); consume_skb(skb); } } module_put(THIS_MODULE); } static int net_dm_trace_on_set(struct netlink_ext_ack *extack) { const struct net_dm_alert_ops *ops; int cpu, rc; ops = net_dm_alert_ops_arr[net_dm_alert_mode]; if (!try_module_get(THIS_MODULE)) { NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); return -ENODEV; } for_each_possible_cpu(cpu) { struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); struct sk_buff *skb; INIT_WORK(&data->dm_alert_work, ops->work_item_func); timer_setup(&data->send_timer, sched_send_work, 0); /* Allocate a new per-CPU skb for the summary alert message and * free the old one which might contain stale data from * previous tracing. */ skb = reset_per_cpu_data(data); consume_skb(skb); } rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint"); goto err_module_put; } rc = register_trace_napi_poll(ops->napi_poll_probe, NULL); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint"); goto err_unregister_trace; } return 0; err_unregister_trace: unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); err_module_put: for_each_possible_cpu(cpu) { struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); struct sk_buff *skb; del_timer_sync(&data->send_timer); cancel_work_sync(&data->dm_alert_work); while ((skb = __skb_dequeue(&data->drop_queue))) consume_skb(skb); } module_put(THIS_MODULE); return rc; } static void net_dm_trace_off_set(void) { const struct net_dm_alert_ops *ops; int cpu; ops = net_dm_alert_ops_arr[net_dm_alert_mode]; unregister_trace_napi_poll(ops->napi_poll_probe, NULL); unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); tracepoint_synchronize_unregister(); /* Make sure we do not send notifications to user space after request * to stop tracing returns. */ for_each_possible_cpu(cpu) { struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); struct sk_buff *skb; del_timer_sync(&data->send_timer); cancel_work_sync(&data->dm_alert_work); while ((skb = __skb_dequeue(&data->drop_queue))) consume_skb(skb); } module_put(THIS_MODULE); } static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack) { int rc = 0; if (state == trace_state) { NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state"); return -EAGAIN; } switch (state) { case TRACE_ON: rc = net_dm_trace_on_set(extack); break; case TRACE_OFF: net_dm_trace_off_set(); break; default: rc = 1; break; } if (!rc) trace_state = state; else rc = -EINPROGRESS; return rc; } static bool net_dm_is_monitoring(void) { return trace_state == TRACE_ON || monitor_hw; } static int net_dm_alert_mode_get_from_info(struct genl_info *info, enum net_dm_alert_mode *p_alert_mode) { u8 val; val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]); switch (val) { case NET_DM_ALERT_MODE_SUMMARY: case NET_DM_ALERT_MODE_PACKET: *p_alert_mode = val; break; default: return -EINVAL; } return 0; } static int net_dm_alert_mode_set(struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; enum net_dm_alert_mode alert_mode; int rc; if (!info->attrs[NET_DM_ATTR_ALERT_MODE]) return 0; rc = net_dm_alert_mode_get_from_info(info, &alert_mode); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode"); return -EINVAL; } net_dm_alert_mode = alert_mode; return 0; } static void net_dm_trunc_len_set(struct genl_info *info) { if (!info->attrs[NET_DM_ATTR_TRUNC_LEN]) return; net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]); } static void net_dm_queue_len_set(struct genl_info *info) { if (!info->attrs[NET_DM_ATTR_QUEUE_LEN]) return; net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]); } static int net_dm_cmd_config(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; int rc; if (net_dm_is_monitoring()) { NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring"); return -EBUSY; } rc = net_dm_alert_mode_set(info); if (rc) return rc; net_dm_trunc_len_set(info); net_dm_queue_len_set(info); return 0; } static int net_dm_monitor_start(bool set_sw, bool set_hw, struct netlink_ext_ack *extack) { bool sw_set = false; int rc; if (set_sw) { rc = set_all_monitor_traces(TRACE_ON, extack); if (rc) return rc; sw_set = true; } if (set_hw) { rc = net_dm_hw_monitor_start(extack); if (rc) goto err_monitor_hw; } return 0; err_monitor_hw: if (sw_set) set_all_monitor_traces(TRACE_OFF, extack); return rc; } static void net_dm_monitor_stop(bool set_sw, bool set_hw, struct netlink_ext_ack *extack) { if (set_hw) net_dm_hw_monitor_stop(extack); if (set_sw) set_all_monitor_traces(TRACE_OFF, extack); } static int net_dm_cmd_trace(struct sk_buff *skb, struct genl_info *info) { bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS]; bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS]; struct netlink_ext_ack *extack = info->extack; /* To maintain backward compatibility, we start / stop monitoring of * software drops if no flag is specified. */ if (!set_sw && !set_hw) set_sw = true; switch (info->genlhdr->cmd) { case NET_DM_CMD_START: return net_dm_monitor_start(set_sw, set_hw, extack); case NET_DM_CMD_STOP: net_dm_monitor_stop(set_sw, set_hw, extack); return 0; } return -EOPNOTSUPP; } static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info) { void *hdr; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW); if (!hdr) return -EMSGSIZE; if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode)) goto nla_put_failure; if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len)) goto nla_put_failure; if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; int rc; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; rc = net_dm_config_fill(msg, info); if (rc) goto free_msg; return genlmsg_reply(msg, info); free_msg: nlmsg_free(msg); return rc; } static void net_dm_stats_read(struct net_dm_stats *stats) { int cpu; memset(stats, 0, sizeof(*stats)); for_each_possible_cpu(cpu) { struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); struct net_dm_stats *cpu_stats = &data->stats; unsigned int start; u64 dropped; do { start = u64_stats_fetch_begin(&cpu_stats->syncp); dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->dropped, dropped); } } static int net_dm_stats_put(struct sk_buff *msg) { struct net_dm_stats stats; struct nlattr *attr; net_dm_stats_read(&stats); attr = nla_nest_start(msg, NET_DM_ATTR_STATS); if (!attr) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static void net_dm_hw_stats_read(struct net_dm_stats *stats) { int cpu; memset(stats, 0, sizeof(*stats)); for_each_possible_cpu(cpu) { struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); struct net_dm_stats *cpu_stats = &hw_data->stats; unsigned int start; u64 dropped; do { start = u64_stats_fetch_begin(&cpu_stats->syncp); dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->dropped, dropped); } } static int net_dm_hw_stats_put(struct sk_buff *msg) { struct net_dm_stats stats; struct nlattr *attr; net_dm_hw_stats_read(&stats); attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS); if (!attr) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); return 0; nla_put_failure: nla_nest_cancel(msg, attr); return -EMSGSIZE; } static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info) { void *hdr; int rc; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW); if (!hdr) return -EMSGSIZE; rc = net_dm_stats_put(msg); if (rc) goto nla_put_failure; rc = net_dm_hw_stats_put(msg); if (rc) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; int rc; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; rc = net_dm_stats_fill(msg, info); if (rc) goto free_msg; return genlmsg_reply(msg, info); free_msg: nlmsg_free(msg); return rc; } static int dropmon_net_event(struct notifier_block *ev_block, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dm_hw_stat_delta *stat; switch (event) { case NETDEV_REGISTER: if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private))) break; stat = kzalloc(sizeof(*stat), GFP_KERNEL); if (!stat) break; stat->last_rx = jiffies; rcu_assign_pointer(dev->dm_private, stat); break; case NETDEV_UNREGISTER: stat = rtnl_dereference(dev->dm_private); if (stat) { rcu_assign_pointer(dev->dm_private, NULL); kfree_rcu(stat, rcu); } break; } return NOTIFY_DONE; } static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = { [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 }, [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 }, [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 }, [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 }, [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG }, [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG }, }; static const struct genl_small_ops dropmon_ops[] = { { .cmd = NET_DM_CMD_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_config, .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_CONFIG_GET, .doit = net_dm_cmd_config_get, }, { .cmd = NET_DM_CMD_STATS_GET, .doit = net_dm_cmd_stats_get, }, }; static int net_dm_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { mutex_lock(&net_dm_mutex); return 0; } static void net_dm_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { mutex_unlock(&net_dm_mutex); } static struct genl_family net_drop_monitor_family __ro_after_init = { .hdrsize = 0, .name = "NET_DM", .version = 2, .maxattr = NET_DM_ATTR_MAX, .policy = net_dm_nl_policy, .pre_doit = net_dm_nl_pre_doit, .post_doit = net_dm_nl_post_doit, .module = THIS_MODULE, .small_ops = dropmon_ops, .n_small_ops = ARRAY_SIZE(dropmon_ops), .resv_start_op = NET_DM_CMD_STATS_GET + 1, .mcgrps = dropmon_mcgrps, .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), }; static struct notifier_block dropmon_net_notifier = { .notifier_call = dropmon_net_event }; static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data) { raw_spin_lock_init(&data->lock); skb_queue_head_init(&data->drop_queue); u64_stats_init(&data->stats.syncp); } static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data) { WARN_ON(!skb_queue_empty(&data->drop_queue)); } static void net_dm_cpu_data_init(int cpu) { struct per_cpu_dm_data *data; data = &per_cpu(dm_cpu_data, cpu); __net_dm_cpu_data_init(data); } static void net_dm_cpu_data_fini(int cpu) { struct per_cpu_dm_data *data; data = &per_cpu(dm_cpu_data, cpu); /* At this point, we should have exclusive access * to this struct and can free the skb inside it. */ consume_skb(data->skb); __net_dm_cpu_data_fini(data); } static void net_dm_hw_cpu_data_init(int cpu) { struct per_cpu_dm_data *hw_data; hw_data = &per_cpu(dm_hw_cpu_data, cpu); __net_dm_cpu_data_init(hw_data); } static void net_dm_hw_cpu_data_fini(int cpu) { struct per_cpu_dm_data *hw_data; hw_data = &per_cpu(dm_hw_cpu_data, cpu); kfree(hw_data->hw_entries); __net_dm_cpu_data_fini(hw_data); } static int __init init_net_drop_monitor(void) { int cpu, rc; pr_info("Initializing network drop monitor service\n"); if (sizeof(void *) > 8) { pr_err("Unable to store program counters on this arch, Drop monitor failed\n"); return -ENOSPC; } rc = genl_register_family(&net_drop_monitor_family); if (rc) { pr_err("Could not create drop monitor netlink family\n"); return rc; } WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); rc = register_netdevice_notifier(&dropmon_net_notifier); if (rc < 0) { pr_crit("Failed to register netdevice notifier\n"); goto out_unreg; } rc = 0; for_each_possible_cpu(cpu) { net_dm_cpu_data_init(cpu); net_dm_hw_cpu_data_init(cpu); } goto out; out_unreg: genl_unregister_family(&net_drop_monitor_family); out: return rc; } static void exit_net_drop_monitor(void) { int cpu; BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); /* * Because of the module_get/put we do in the trace state change path * we are guaranteed not to have any current users when we get here */ for_each_possible_cpu(cpu) { net_dm_hw_cpu_data_fini(cpu); net_dm_cpu_data_fini(cpu); } BUG_ON(genl_unregister_family(&net_drop_monitor_family)); } module_init(init_net_drop_monitor); module_exit(exit_net_drop_monitor); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); MODULE_ALIAS_GENL_FAMILY("NET_DM"); MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts"); |
7 7 3 4 4 3 1 3 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | // SPDX-License-Identifier: GPL-2.0 /* * xfrm4_policy.c * * Changes: * Kazunori MIYAZAWA @USAGI * YOSHIFUJI Hideaki @USAGI * Split up af-specific portion * */ #include <linux/err.h> #include <linux/kernel.h> #include <linux/inetdevice.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/l3mdev.h> static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, u32 mark) { struct rtable *rt; memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; rt = __ip_route_output_key(net, fl4); if (!IS_ERR(rt)) return &rt->dst; return ERR_CAST(rt); } static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, u32 mark) { struct flowi4 fl4; return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); } static int xfrm4_get_saddr(struct net *net, int oif, xfrm_address_t *saddr, xfrm_address_t *daddr, u32 mark) { struct dst_entry *dst; struct flowi4 fl4; dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; saddr->a4 = fl4.saddr; dst_release(dst); return 0; } static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { struct rtable *rt = dst_rtable(xdst->route); const struct flowi4 *fl4 = &fl->u.ip4; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt.rt_is_input = rt->rt_is_input; xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_gw_family = rt->rt_gw_family; if (rt->rt_gw_family == AF_INET) xdst->u.rt.rt_gw4 = rt->rt_gw4; else if (rt->rt_gw_family == AF_INET6) xdst->u.rt.rt_gw6 = rt->rt_gw6; xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; rt_add_uncached_list(&xdst->u.rt); return 0; } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->redirect(path, sk, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); rt_del_uncached_list(&xdst->u.rt); xfrm_dst_destroy(xdst); } static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm_dst_ifdown, .local_out = __ip_local_out, .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, .fill_dst = xfrm4_fill_dst, .blackhole_route = ipv4_blackhole_route, }; #ifdef CONFIG_SYSCTL static struct ctl_table xfrm4_policy_table[] = { { .procname = "xfrm4_gc_thresh", .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static __net_init int xfrm4_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = xfrm4_policy_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL); if (!table) goto err_alloc; table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh; } hdr = register_net_sysctl_sz(net, "net/ipv4", table, ARRAY_SIZE(xfrm4_policy_table)); if (!hdr) goto err_reg; net->ipv4.xfrm4_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static __net_exit void xfrm4_net_sysctl_exit(struct net *net) { const struct ctl_table *table; if (!net->ipv4.xfrm4_hdr) return; table = net->ipv4.xfrm4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.xfrm4_hdr); if (!net_eq(net, &init_net)) kfree(table); } #else /* CONFIG_SYSCTL */ static inline int xfrm4_net_sysctl_init(struct net *net) { return 0; } static inline void xfrm4_net_sysctl_exit(struct net *net) { } #endif static int __net_init xfrm4_net_init(struct net *net) { int ret; memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template, sizeof(xfrm4_dst_ops_template)); ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops); if (ret) return ret; ret = xfrm4_net_sysctl_init(net); if (ret) dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); return ret; } static void __net_exit xfrm4_net_exit(struct net *net) { xfrm4_net_sysctl_exit(net); dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); } static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, }; static void __init xfrm4_policy_init(void) { xfrm_policy_register_afinfo(&xfrm4_policy_afinfo, AF_INET); } void __init xfrm4_init(void) { xfrm4_state_init(); xfrm4_policy_init(); xfrm4_protocol_init(); register_pernet_subsys(&xfrm4_net_ops); } |
23 494 54 465 507 141 488 501 502 498 18 32 587 593 590 590 587 594 416 473 183 201 501 448 467 474 498 503 499 463 502 1 410 510 514 502 467 466 445 445 23 23 23 209 209 198 187 208 191 193 795 423 795 792 15 15 211 209 193 193 208 3 211 211 783 211 781 782 16 16 69 69 69 67 411 412 174 175 173 174 411 411 410 410 595 580 591 623 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 | // SPDX-License-Identifier: GPL-2.0 /* * kobject.c - library routines for handling generic kernel objects * * Copyright (c) 2002-2003 Patrick Mochel <mochel@osdl.org> * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com> * Copyright (c) 2006-2007 Novell Inc. * * Please see the file Documentation/core-api/kobject.rst for critical information * about using the kobject interface. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kobject.h> #include <linux/string.h> #include <linux/export.h> #include <linux/stat.h> #include <linux/slab.h> #include <linux/random.h> /** * kobject_namespace() - Return @kobj's namespace tag. * @kobj: kobject in question * * Returns namespace tag of @kobj if its parent has namespace ops enabled * and thus @kobj should have a namespace tag associated with it. Returns * %NULL otherwise. */ const void *kobject_namespace(const struct kobject *kobj) { const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj); if (!ns_ops || ns_ops->type == KOBJ_NS_TYPE_NONE) return NULL; return kobj->ktype->namespace(kobj); } /** * kobject_get_ownership() - Get sysfs ownership data for @kobj. * @kobj: kobject in question * @uid: kernel user ID for sysfs objects * @gid: kernel group ID for sysfs objects * * Returns initial uid/gid pair that should be used when creating sysfs * representation of given kobject. Normally used to adjust ownership of * objects in a container. */ void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { *uid = GLOBAL_ROOT_UID; *gid = GLOBAL_ROOT_GID; if (kobj->ktype->get_ownership) kobj->ktype->get_ownership(kobj, uid, gid); } static bool kobj_ns_type_is_valid(enum kobj_ns_type type) { if ((type <= KOBJ_NS_TYPE_NONE) || (type >= KOBJ_NS_TYPES)) return false; return true; } static int create_dir(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); const struct kobj_ns_type_operations *ops; int error; error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); if (error) return error; if (ktype) { error = sysfs_create_groups(kobj, ktype->default_groups); if (error) { sysfs_remove_dir(kobj); return error; } } /* * @kobj->sd may be deleted by an ancestor going away. Hold an * extra reference so that it stays until @kobj is gone. */ sysfs_get(kobj->sd); /* * If @kobj has ns_ops, its children need to be filtered based on * their namespace tags. Enable namespace support on @kobj->sd. */ ops = kobj_child_ns_ops(kobj); if (ops) { BUG_ON(!kobj_ns_type_is_valid(ops->type)); BUG_ON(!kobj_ns_type_registered(ops->type)); sysfs_enable_ns(kobj->sd); } return 0; } static int get_kobj_path_length(const struct kobject *kobj) { int length = 1; const struct kobject *parent = kobj; /* walk up the ancestors until we hit the one pointing to the * root. * Add 1 to strlen for leading '/' of each level. */ do { if (kobject_name(parent) == NULL) return 0; length += strlen(kobject_name(parent)) + 1; parent = parent->parent; } while (parent); return length; } static int fill_kobj_path(const struct kobject *kobj, char *path, int length) { const struct kobject *parent; --length; for (parent = kobj; parent; parent = parent->parent) { int cur = strlen(kobject_name(parent)); /* back up enough to print this name with '/' */ length -= cur; if (length <= 0) return -EINVAL; memcpy(path + length, kobject_name(parent), cur); *(path + --length) = '/'; } pr_debug("'%s' (%p): %s: path = '%s'\n", kobject_name(kobj), kobj, __func__, path); return 0; } /** * kobject_get_path() - Allocate memory and fill in the path for @kobj. * @kobj: kobject in question, with which to build the path * @gfp_mask: the allocation type used to allocate the path * * Return: The newly allocated memory, caller must free with kfree(). */ char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask) { char *path; int len; retry: len = get_kobj_path_length(kobj); if (len == 0) return NULL; path = kzalloc(len, gfp_mask); if (!path) return NULL; if (fill_kobj_path(kobj, path, len)) { kfree(path); goto retry; } return path; } EXPORT_SYMBOL_GPL(kobject_get_path); /* add the kobject to its kset's list */ static void kobj_kset_join(struct kobject *kobj) { if (!kobj->kset) return; kset_get(kobj->kset); spin_lock(&kobj->kset->list_lock); list_add_tail(&kobj->entry, &kobj->kset->list); spin_unlock(&kobj->kset->list_lock); } /* remove the kobject from its kset's list */ static void kobj_kset_leave(struct kobject *kobj) { if (!kobj->kset) return; spin_lock(&kobj->kset->list_lock); list_del_init(&kobj->entry); spin_unlock(&kobj->kset->list_lock); kset_put(kobj->kset); } static void kobject_init_internal(struct kobject *kobj) { if (!kobj) return; kref_init(&kobj->kref); INIT_LIST_HEAD(&kobj->entry); kobj->state_in_sysfs = 0; kobj->state_add_uevent_sent = 0; kobj->state_remove_uevent_sent = 0; kobj->state_initialized = 1; } static int kobject_add_internal(struct kobject *kobj) { int error = 0; struct kobject *parent; if (!kobj) return -ENOENT; if (!kobj->name || !kobj->name[0]) { WARN(1, "kobject: (%p): attempted to be registered with empty name!\n", kobj); return -EINVAL; } parent = kobject_get(kobj->parent); /* join kset if set, use it as parent if we do not already have one */ if (kobj->kset) { if (!parent) parent = kobject_get(&kobj->kset->kobj); kobj_kset_join(kobj); kobj->parent = parent; } pr_debug("'%s' (%p): %s: parent: '%s', set: '%s'\n", kobject_name(kobj), kobj, __func__, parent ? kobject_name(parent) : "<NULL>", kobj->kset ? kobject_name(&kobj->kset->kobj) : "<NULL>"); error = create_dir(kobj); if (error) { kobj_kset_leave(kobj); kobject_put(parent); kobj->parent = NULL; /* be noisy on error issues */ if (error == -EEXIST) pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n", __func__, kobject_name(kobj)); else pr_err("%s failed for %s (error: %d parent: %s)\n", __func__, kobject_name(kobj), error, parent ? kobject_name(parent) : "'none'"); } else kobj->state_in_sysfs = 1; return error; } /** * kobject_set_name_vargs() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @vargs: vargs to format the string. */ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs) { const char *s; if (kobj->name && !fmt) return 0; s = kvasprintf_const(GFP_KERNEL, fmt, vargs); if (!s) return -ENOMEM; /* * ewww... some of these buggers have '/' in the name ... If * that's the case, we need to make sure we have an actual * allocated copy to modify, since kvasprintf_const may have * returned something from .rodata. */ if (strchr(s, '/')) { char *t; t = kstrdup(s, GFP_KERNEL); kfree_const(s); if (!t) return -ENOMEM; s = strreplace(t, '/', '!'); } kfree_const(kobj->name); kobj->name = s; return 0; } /** * kobject_set_name() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * * This sets the name of the kobject. If you have already added the * kobject to the system, you must call kobject_rename() in order to * change the name of the kobject. */ int kobject_set_name(struct kobject *kobj, const char *fmt, ...) { va_list vargs; int retval; va_start(vargs, fmt); retval = kobject_set_name_vargs(kobj, fmt, vargs); va_end(vargs); return retval; } EXPORT_SYMBOL(kobject_set_name); /** * kobject_init() - Initialize a kobject structure. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * * This function will properly initialize a kobject such that it can then * be passed to the kobject_add() call. * * After this function is called, the kobject MUST be cleaned up by a call * to kobject_put(), not by a call to kfree directly to ensure that all of * the memory is cleaned up properly. */ void kobject_init(struct kobject *kobj, const struct kobj_type *ktype) { char *err_str; if (!kobj) { err_str = "invalid kobject pointer!"; goto error; } if (!ktype) { err_str = "must have a ktype to be initialized properly!\n"; goto error; } if (kobj->state_initialized) { /* do not error out as sometimes we can recover */ pr_err("kobject (%p): tried to init an initialized object, something is seriously wrong.\n", kobj); dump_stack_lvl(KERN_ERR); } kobject_init_internal(kobj); kobj->ktype = ktype; return; error: pr_err("kobject (%p): %s\n", kobj, err_str); dump_stack_lvl(KERN_ERR); } EXPORT_SYMBOL(kobject_init); static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, struct kobject *parent, const char *fmt, va_list vargs) { int retval; retval = kobject_set_name_vargs(kobj, fmt, vargs); if (retval) { pr_err("can not set name properly!\n"); return retval; } kobj->parent = parent; return kobject_add_internal(kobj); } /** * kobject_add() - The main kobject add function. * @kobj: the kobject to add * @parent: pointer to the parent of the kobject. * @fmt: format to name the kobject with. * * The kobject name is set and added to the kobject hierarchy in this * function. * * If @parent is set, then the parent of the @kobj will be set to it. * If @parent is NULL, then the parent of the @kobj will be set to the * kobject associated with the kset assigned to this kobject. If no kset * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * * Note, no "add" uevent will be created with this call, the caller should set * up all of the necessary sysfs files for the object and then call * kobject_uevent() with the UEVENT_ADD parameter to ensure that * userspace is properly notified of this kobject's creation. * * Return: If this function returns an error, kobject_put() must be * called to properly clean up the memory associated with the * object. Under no instance should the kobject that is passed * to this function be directly freed with a call to kfree(), * that can leak memory. * * If this function returns success, kobject_put() must also be called * in order to properly clean up the memory associated with the object. * * In short, once this function is called, kobject_put() MUST be called * when the use of the object is finished in order to properly free * everything. */ int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) { va_list args; int retval; if (!kobj) return -EINVAL; if (!kobj->state_initialized) { pr_err("kobject '%s' (%p): tried to add an uninitialized object, something is seriously wrong.\n", kobject_name(kobj), kobj); dump_stack_lvl(KERN_ERR); return -EINVAL; } va_start(args, fmt); retval = kobject_add_varg(kobj, parent, fmt, args); va_end(args); return retval; } EXPORT_SYMBOL(kobject_add); /** * kobject_init_and_add() - Initialize a kobject structure and add it to * the kobject hierarchy. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @parent: pointer to the parent of this kobject. * @fmt: the name of the kobject. * * This function combines the call to kobject_init() and kobject_add(). * * If this function returns an error, kobject_put() must be called to * properly clean up the memory associated with the object. This is the * same type of error handling after a call to kobject_add() and kobject * lifetime rules are the same here. */ int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) { va_list args; int retval; kobject_init(kobj, ktype); va_start(args, fmt); retval = kobject_add_varg(kobj, parent, fmt, args); va_end(args); return retval; } EXPORT_SYMBOL_GPL(kobject_init_and_add); /** * kobject_rename() - Change the name of an object. * @kobj: object in question. * @new_name: object's new name * * It is the responsibility of the caller to provide mutual * exclusion between two different calls of kobject_rename * on the same kobject and to ensure that new_name is valid and * won't conflict with other kobjects. */ int kobject_rename(struct kobject *kobj, const char *new_name) { int error = 0; const char *devpath = NULL; const char *dup_name = NULL, *name; char *devpath_string = NULL; char *envp[2]; kobj = kobject_get(kobj); if (!kobj) return -EINVAL; if (!kobj->parent) { kobject_put(kobj); return -EINVAL; } devpath = kobject_get_path(kobj, GFP_KERNEL); if (!devpath) { error = -ENOMEM; goto out; } devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL); if (!devpath_string) { error = -ENOMEM; goto out; } sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); envp[0] = devpath_string; envp[1] = NULL; name = dup_name = kstrdup_const(new_name, GFP_KERNEL); if (!name) { error = -ENOMEM; goto out; } error = sysfs_rename_dir_ns(kobj, new_name, kobject_namespace(kobj)); if (error) goto out; /* Install the new kobject name */ dup_name = kobj->name; kobj->name = name; /* This function is mostly/only used for network interface. * Some hotplug package track interfaces by their name and * therefore want to know when the name is changed by the user. */ kobject_uevent_env(kobj, KOBJ_MOVE, envp); out: kfree_const(dup_name); kfree(devpath_string); kfree(devpath); kobject_put(kobj); return error; } EXPORT_SYMBOL_GPL(kobject_rename); /** * kobject_move() - Move object to another parent. * @kobj: object in question. * @new_parent: object's new parent (can be NULL) */ int kobject_move(struct kobject *kobj, struct kobject *new_parent) { int error; struct kobject *old_parent; const char *devpath = NULL; char *devpath_string = NULL; char *envp[2]; kobj = kobject_get(kobj); if (!kobj) return -EINVAL; new_parent = kobject_get(new_parent); if (!new_parent) { if (kobj->kset) new_parent = kobject_get(&kobj->kset->kobj); } /* old object path */ devpath = kobject_get_path(kobj, GFP_KERNEL); if (!devpath) { error = -ENOMEM; goto out; } devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL); if (!devpath_string) { error = -ENOMEM; goto out; } sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); envp[0] = devpath_string; envp[1] = NULL; error = sysfs_move_dir_ns(kobj, new_parent, kobject_namespace(kobj)); if (error) goto out; old_parent = kobj->parent; kobj->parent = new_parent; new_parent = NULL; kobject_put(old_parent); kobject_uevent_env(kobj, KOBJ_MOVE, envp); out: kobject_put(new_parent); kobject_put(kobj); kfree(devpath_string); kfree(devpath); return error; } EXPORT_SYMBOL_GPL(kobject_move); static void __kobject_del(struct kobject *kobj) { struct kernfs_node *sd; const struct kobj_type *ktype; sd = kobj->sd; ktype = get_ktype(kobj); if (ktype) sysfs_remove_groups(kobj, ktype->default_groups); /* send "remove" if the caller did not do it but sent "add" */ if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) { pr_debug("'%s' (%p): auto cleanup 'remove' event\n", kobject_name(kobj), kobj); kobject_uevent(kobj, KOBJ_REMOVE); } sysfs_remove_dir(kobj); sysfs_put(sd); kobj->state_in_sysfs = 0; kobj_kset_leave(kobj); kobj->parent = NULL; } /** * kobject_del() - Unlink kobject from hierarchy. * @kobj: object. * * This is the function that should be called to delete an object * successfully added via kobject_add(). */ void kobject_del(struct kobject *kobj) { struct kobject *parent; if (!kobj) return; parent = kobj->parent; __kobject_del(kobj); kobject_put(parent); } EXPORT_SYMBOL(kobject_del); /** * kobject_get() - Increment refcount for object. * @kobj: object. */ struct kobject *kobject_get(struct kobject *kobj) { if (kobj) { if (!kobj->state_initialized) WARN(1, KERN_WARNING "kobject: '%s' (%p): is not initialized, yet kobject_get() is being called.\n", kobject_name(kobj), kobj); kref_get(&kobj->kref); } return kobj; } EXPORT_SYMBOL(kobject_get); struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj) { if (!kobj) return NULL; if (!kref_get_unless_zero(&kobj->kref)) kobj = NULL; return kobj; } EXPORT_SYMBOL(kobject_get_unless_zero); /* * kobject_cleanup - free kobject resources. * @kobj: object to cleanup */ static void kobject_cleanup(struct kobject *kobj) { struct kobject *parent = kobj->parent; const struct kobj_type *t = get_ktype(kobj); const char *name = kobj->name; pr_debug("'%s' (%p): %s, parent %p\n", kobject_name(kobj), kobj, __func__, kobj->parent); if (t && !t->release) pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", kobject_name(kobj), kobj); /* remove from sysfs if the caller did not do it */ if (kobj->state_in_sysfs) { pr_debug("'%s' (%p): auto cleanup kobject_del\n", kobject_name(kobj), kobj); __kobject_del(kobj); } else { /* avoid dropping the parent reference unnecessarily */ parent = NULL; } if (t && t->release) { pr_debug("'%s' (%p): calling ktype release\n", kobject_name(kobj), kobj); t->release(kobj); } /* free name if we allocated it */ if (name) { pr_debug("'%s': free name\n", name); kfree_const(name); } kobject_put(parent); } #ifdef CONFIG_DEBUG_KOBJECT_RELEASE static void kobject_delayed_cleanup(struct work_struct *work) { kobject_cleanup(container_of(to_delayed_work(work), struct kobject, release)); } #endif static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE unsigned long delay = HZ + HZ * get_random_u32_below(4); pr_info("'%s' (%p): %s, parent %p (delayed %ld)\n", kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); schedule_delayed_work(&kobj->release, delay); #else kobject_cleanup(kobj); #endif } /** * kobject_put() - Decrement refcount for object. * @kobj: object. * * Decrement the refcount, and if 0, call kobject_cleanup(). */ void kobject_put(struct kobject *kobj) { if (kobj) { if (!kobj->state_initialized) WARN(1, KERN_WARNING "kobject: '%s' (%p): is not initialized, yet kobject_put() is being called.\n", kobject_name(kobj), kobj); kref_put(&kobj->kref, kobject_release); } } EXPORT_SYMBOL(kobject_put); static void dynamic_kobj_release(struct kobject *kobj) { pr_debug("(%p): %s\n", kobj, __func__); kfree(kobj); } static const struct kobj_type dynamic_kobj_ktype = { .release = dynamic_kobj_release, .sysfs_ops = &kobj_sysfs_ops, }; /** * kobject_create() - Create a struct kobject dynamically. * * This function creates a kobject structure dynamically and sets it up * to be a "dynamic" kobject with a default release function set up. * * If the kobject was not able to be created, NULL will be returned. * The kobject structure returned from here must be cleaned up with a * call to kobject_put() and not kfree(), as kobject_init() has * already been called on this structure. */ static struct kobject *kobject_create(void) { struct kobject *kobj; kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); if (!kobj) return NULL; kobject_init(kobj, &dynamic_kobj_ktype); return kobj; } /** * kobject_create_and_add() - Create a struct kobject dynamically and * register it with sysfs. * @name: the name for the kobject * @parent: the parent kobject of this kobject, if any. * * This function creates a kobject structure dynamically and registers it * with sysfs. When you are finished with this structure, call * kobject_put() and the structure will be dynamically freed when * it is no longer being used. * * If the kobject was not able to be created, NULL will be returned. */ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent) { struct kobject *kobj; int retval; kobj = kobject_create(); if (!kobj) return NULL; retval = kobject_add(kobj, parent, "%s", name); if (retval) { pr_warn("%s: kobject_add error: %d\n", __func__, retval); kobject_put(kobj); kobj = NULL; } return kobj; } EXPORT_SYMBOL_GPL(kobject_create_and_add); /** * kset_init() - Initialize a kset for use. * @k: kset */ void kset_init(struct kset *k) { kobject_init_internal(&k->kobj); INIT_LIST_HEAD(&k->list); spin_lock_init(&k->list_lock); } /* default kobject attribute operations */ static ssize_t kobj_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct kobj_attribute *kattr; ssize_t ret = -EIO; kattr = container_of(attr, struct kobj_attribute, attr); if (kattr->show) ret = kattr->show(kobj, kattr, buf); return ret; } static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct kobj_attribute *kattr; ssize_t ret = -EIO; kattr = container_of(attr, struct kobj_attribute, attr); if (kattr->store) ret = kattr->store(kobj, kattr, buf, count); return ret; } const struct sysfs_ops kobj_sysfs_ops = { .show = kobj_attr_show, .store = kobj_attr_store, }; EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** * kset_register() - Initialize and add a kset. * @k: kset. * * NOTE: On error, the kset.kobj.name allocated by() kobj_set_name() * is freed, it can not be used any more. */ int kset_register(struct kset *k) { int err; if (!k) return -EINVAL; if (!k->kobj.ktype) { pr_err("must have a ktype to be initialized properly!\n"); return -EINVAL; } kset_init(k); err = kobject_add_internal(&k->kobj); if (err) { kfree_const(k->kobj.name); /* Set it to NULL to avoid accessing bad pointer in callers. */ k->kobj.name = NULL; return err; } kobject_uevent(&k->kobj, KOBJ_ADD); return 0; } EXPORT_SYMBOL(kset_register); /** * kset_unregister() - Remove a kset. * @k: kset. */ void kset_unregister(struct kset *k) { if (!k) return; kobject_del(&k->kobj); kobject_put(&k->kobj); } EXPORT_SYMBOL(kset_unregister); /** * kset_find_obj() - Search for object in kset. * @kset: kset we're looking in. * @name: object's name. * * Lock kset via @kset->subsys, and iterate over @kset->list, * looking for a matching kobject. If matching object is found * take a reference and return the object. */ struct kobject *kset_find_obj(struct kset *kset, const char *name) { struct kobject *k; struct kobject *ret = NULL; spin_lock(&kset->list_lock); list_for_each_entry(k, &kset->list, entry) { if (kobject_name(k) && !strcmp(kobject_name(k), name)) { ret = kobject_get_unless_zero(k); break; } } spin_unlock(&kset->list_lock); return ret; } EXPORT_SYMBOL_GPL(kset_find_obj); static void kset_release(struct kobject *kobj) { struct kset *kset = container_of(kobj, struct kset, kobj); pr_debug("'%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); kfree(kset); } static void kset_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { if (kobj->parent) kobject_get_ownership(kobj->parent, uid, gid); } static const struct kobj_type kset_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = kset_release, .get_ownership = kset_get_ownership, }; /** * kset_create() - Create a struct kset dynamically. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset * @parent_kobj: the parent kobject of this kset, if any. * * This function creates a kset structure dynamically. This structure can * then be registered with the system and show up in sysfs with a call to * kset_register(). When you are finished with this structure, if * kset_register() has been called, call kset_unregister() and the * structure will be dynamically freed when it is no longer being used. * * If the kset was not able to be created, NULL will be returned. */ static struct kset *kset_create(const char *name, const struct kset_uevent_ops *uevent_ops, struct kobject *parent_kobj) { struct kset *kset; int retval; kset = kzalloc(sizeof(*kset), GFP_KERNEL); if (!kset) return NULL; retval = kobject_set_name(&kset->kobj, "%s", name); if (retval) { kfree(kset); return NULL; } kset->uevent_ops = uevent_ops; kset->kobj.parent = parent_kobj; /* * The kobject of this kset will have a type of kset_ktype and belong to * no kset itself. That way we can properly free it when it is * finished being used. */ kset->kobj.ktype = &kset_ktype; kset->kobj.kset = NULL; return kset; } /** * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset * @parent_kobj: the parent kobject of this kset, if any. * * This function creates a kset structure dynamically and registers it * with sysfs. When you are finished with this structure, call * kset_unregister() and the structure will be dynamically freed when it * is no longer being used. * * If the kset was not able to be created, NULL will be returned. */ struct kset *kset_create_and_add(const char *name, const struct kset_uevent_ops *uevent_ops, struct kobject *parent_kobj) { struct kset *kset; int error; kset = kset_create(name, uevent_ops, parent_kobj); if (!kset) return NULL; error = kset_register(kset); if (error) { kfree(kset); return NULL; } return kset; } EXPORT_SYMBOL_GPL(kset_create_and_add); static DEFINE_SPINLOCK(kobj_ns_type_lock); static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) { enum kobj_ns_type type = ops->type; int error; spin_lock(&kobj_ns_type_lock); error = -EINVAL; if (!kobj_ns_type_is_valid(type)) goto out; error = -EBUSY; if (kobj_ns_ops_tbl[type]) goto out; error = 0; kobj_ns_ops_tbl[type] = ops; out: spin_unlock(&kobj_ns_type_lock); return error; } int kobj_ns_type_registered(enum kobj_ns_type type) { int registered = 0; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type)) registered = kobj_ns_ops_tbl[type] != NULL; spin_unlock(&kobj_ns_type_lock); return registered; } const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent) { const struct kobj_ns_type_operations *ops = NULL; if (parent && parent->ktype && parent->ktype->child_ns_type) ops = parent->ktype->child_ns_type(parent); return ops; } const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj) { return kobj_child_ns_ops(kobj->parent); } bool kobj_ns_current_may_mount(enum kobj_ns_type type) { bool may_mount = true; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) may_mount = kobj_ns_ops_tbl[type]->current_may_mount(); spin_unlock(&kobj_ns_type_lock); return may_mount; } void *kobj_ns_grab_current(enum kobj_ns_type type) { void *ns = NULL; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) ns = kobj_ns_ops_tbl[type]->grab_current_ns(); spin_unlock(&kobj_ns_type_lock); return ns; } EXPORT_SYMBOL_GPL(kobj_ns_grab_current); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) { const void *ns = NULL; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); spin_unlock(&kobj_ns_type_lock); return ns; } const void *kobj_ns_initial(enum kobj_ns_type type) { const void *ns = NULL; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) ns = kobj_ns_ops_tbl[type]->initial_ns(); spin_unlock(&kobj_ns_type_lock); return ns; } void kobj_ns_drop(enum kobj_ns_type type, void *ns) { spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) kobj_ns_ops_tbl[type]->drop_ns(ns); spin_unlock(&kobj_ns_type_lock); } EXPORT_SYMBOL_GPL(kobj_ns_drop); |
2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Source Hashing scheduling module * * Authors: Wensong Zhang <wensong@gnuchina.org> * * Changes: */ /* * The sh algorithm is to select server by the hash key of source IP * address. The pseudo code is as follows: * * n <- servernode[src_ip]; * if (n is dead) OR * (n is overloaded) or (n.weight <= 0) then * return NULL; * * return n; * * Notes that servernode is a 256-bucket hash table that maps the hash * index derived from packet source IP address to the current server * array. If the sh scheduler is used in cache cluster, it is good to * combine it with cache_bypass feature. When the statically assigned * server is dead or overloaded, the load balancer can bypass the cache * server and send requests to the original server directly. * * The weight destination attribute can be used to control the * distribution of connections to the destinations in servernode. The * greater the weight, the more connections the destination * will receive. * */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/ip.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/ip_vs.h> #include <net/tcp.h> #include <linux/udp.h> #include <linux/sctp.h> /* * IPVS SH bucket */ struct ip_vs_sh_bucket { struct ip_vs_dest __rcu *dest; /* real server (cache) */ }; /* * for IPVS SH entry hash table */ #ifndef CONFIG_IP_VS_SH_TAB_BITS #define CONFIG_IP_VS_SH_TAB_BITS 8 #endif #define IP_VS_SH_TAB_BITS CONFIG_IP_VS_SH_TAB_BITS #define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) struct ip_vs_sh_state { struct rcu_head rcu_head; struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; /* Helper function to determine if server is unavailable */ static inline bool is_unavailable(struct ip_vs_dest *dest) { return atomic_read(&dest->weight) <= 0 || dest->flags & IP_VS_DEST_F_OVERLOAD; } /* * Returns hash value for IPVS SH entry */ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, __be16 port, unsigned int offset) { __be32 addr_fold = addr->ip; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif return (offset + hash_32(ntohs(port) + ntohl(addr_fold), IP_VS_SH_TAB_BITS)) & IP_VS_SH_TAB_MASK; } /* * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); return (!dest || is_unavailable(dest)) ? NULL : dest; } /* As ip_vs_sh_get, but with fallback if selected server is unavailable * * The fallback strategy loops around the table starting from a "random" * point (in fact, it is chosen to be the original hash value to make the * algorithm deterministic) to find a new server. */ static inline struct ip_vs_dest * ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { unsigned int offset, roffset; unsigned int hash, ihash; struct ip_vs_dest *dest; /* first try the dest it's supposed to go to */ ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); dest = rcu_dereference(s->buckets[ihash].dest); if (!dest) return NULL; if (!is_unavailable(dest)) return dest; IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); /* if the original dest is unavailable, loop around the table * starting from ihash to find a new dest */ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); dest = rcu_dereference(s->buckets[hash].dest); if (!dest) break; if (!is_unavailable(dest)) return dest; IP_VS_DBG_BUF(6, "SH: selected unavailable " "server %s:%d (offset %d), reselecting", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), roffset); } return NULL; } /* * Assign all the hash buckets of the specified table with the service. */ static int ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc) { int i; struct ip_vs_sh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; int d_count; bool empty; b = &s->buckets[0]; p = &svc->destinations; empty = list_empty(p); d_count = 0; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { dest = rcu_dereference_protected(b->dest, 1); if (dest) ip_vs_dest_put(dest); if (empty) RCU_INIT_POINTER(b->dest, NULL); else { if (p == &svc->destinations) p = p->next; dest = list_entry(p, struct ip_vs_dest, n_list); ip_vs_dest_hold(dest); RCU_INIT_POINTER(b->dest, dest); IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", i, IP_VS_DBG_ADDR(dest->af, &dest->addr), atomic_read(&dest->weight)); /* Don't move to next dest until filling weight */ if (++d_count >= atomic_read(&dest->weight)) { p = p->next; d_count = 0; } } b++; } return 0; } /* * Flush all the hash buckets of the specified table. */ static void ip_vs_sh_flush(struct ip_vs_sh_state *s) { int i; struct ip_vs_sh_bucket *b; struct ip_vs_dest *dest; b = &s->buckets[0]; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { dest = rcu_dereference_protected(b->dest, 1); if (dest) { ip_vs_dest_put(dest); RCU_INIT_POINTER(b->dest, NULL); } b++; } } static int ip_vs_sh_init_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s; /* allocate the SH table for this service */ s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL); if (s == NULL) return -ENOMEM; svc->sched_data = s; IP_VS_DBG(6, "SH hash table (memory=%zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); /* assign the hash buckets with current dests */ ip_vs_sh_reassign(s, svc); return 0; } static void ip_vs_sh_done_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s = svc->sched_data; /* got to clean up hash buckets here */ ip_vs_sh_flush(s); /* release the table itself */ kfree_rcu(s, rcu_head); IP_VS_DBG(6, "SH hash table (memory=%zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); } static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct ip_vs_sh_state *s = svc->sched_data; /* assign the hash buckets with the updated service */ ip_vs_sh_reassign(s, svc); return 0; } /* Helper function to get port number */ static inline __be16 ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { __be16 _ports[2], *ports; /* At this point we know that we have a valid packet of some kind. * Because ICMP packets are only guaranteed to have the first 8 * bytes, let's just grab the ports. Fortunately they're in the * same position for all three of the protocols we care about. */ switch (iph->protocol) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: ports = skb_header_pointer(skb, iph->len, sizeof(_ports), &_ports); if (unlikely(!ports)) return 0; if (likely(!ip_vs_iph_inverse(iph))) return ports[0]; else return ports[1]; default: return 0; } } /* * Source Hashing scheduling */ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; __be16 port = 0; const union nf_inet_addr *hash_addr; hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) port = ip_vs_sh_get_port(skb, iph); s = (struct ip_vs_sh_state *) svc->sched_data; if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port); else dest = ip_vs_sh_get(svc, s, hash_addr, port); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, hash_addr), IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); return dest; } /* * IPVS SH Scheduler structure */ static struct ip_vs_scheduler ip_vs_sh_scheduler = { .name = "sh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .add_dest = ip_vs_sh_dest_changed, .del_dest = ip_vs_sh_dest_changed, .upd_dest = ip_vs_sh_dest_changed, .schedule = ip_vs_sh_schedule, }; static int __init ip_vs_sh_init(void) { return register_ip_vs_scheduler(&ip_vs_sh_scheduler); } static void __exit ip_vs_sh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); synchronize_rcu(); } module_init(ip_vs_sh_init); module_exit(ip_vs_sh_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ipvs source hashing scheduler"); |
81 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NSPROXY_H #define _LINUX_NSPROXY_H #include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/sched.h> struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; struct cgroup_namespace; struct fs_struct; /* * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * * The pid namespace is an exception -- it's accessed using * task_active_pid_ns. The pid namespace here is the * namespace that children will use. * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. * * The nsproxy is shared by tasks which share all namespaces. * As soon as a single namespace is cloned or unshared, the * nsproxy is copied. */ struct nsproxy { refcount_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns_for_children; struct net *net_ns; struct time_namespace *time_ns; struct time_namespace *time_ns_for_children; struct cgroup_namespace *cgroup_ns; }; extern struct nsproxy init_nsproxy; #define to_ns_common(__ns) \ _Generic((__ns), \ struct cgroup_namespace *: &(__ns->ns), \ struct ipc_namespace *: &(__ns->ns), \ struct net *: &(__ns->ns), \ struct pid_namespace *: &(__ns->ns), \ struct mnt_namespace *: &(__ns->ns), \ struct time_namespace *: &(__ns->ns), \ struct user_namespace *: &(__ns->ns), \ struct uts_namespace *: &(__ns->ns)) /* * A structure to encompass all bits needed to install * a partial or complete new set of namespaces. * * If a new user namespace is requested cred will * point to a modifiable set of credentials. If a pointer * to a modifiable set is needed nsset_cred() must be * used and tested. */ struct nsset { unsigned flags; struct nsproxy *nsproxy; struct fs_struct *fs; const struct cred *cred; }; static inline struct cred *nsset_cred(struct nsset *set) { if (set->flags & CLONE_NEWUSER) return (struct cred *)set->cred; return NULL; } /* * the namespaces access rules are: * * 1. only current task is allowed to change tsk->nsproxy pointer or * any pointer on the nsproxy itself. Current must hold the task_lock * when changing tsk->nsproxy. * * 2. when accessing (i.e. reading) current task's namespaces - no * precautions should be taken - just dereference the pointers * * 3. the access to other task namespaces is performed like this * task_lock(task); * nsproxy = task->nsproxy; * if (nsproxy != NULL) { * / * * * work with the namespaces here * * e.g. get the reference on one of them * * / * } / * * * NULL task->nsproxy means that this task is * * almost dead (zombie) * * / * task_unlock(task); * */ int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); int exec_task_namespaces(void); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { if (refcount_dec_and_test(&ns->count)) free_nsproxy(ns); } static inline void get_nsproxy(struct nsproxy *ns) { refcount_inc(&ns->count); } DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T)) #endif |
79 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IPC_NAMESPACE_H__ #define __IPC_NAMESPACE_H__ #include <linux/err.h> #include <linux/idr.h> #include <linux/rwsem.h> #include <linux/notifier.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/refcount.h> #include <linux/rhashtable-types.h> #include <linux/sysctl.h> #include <linux/percpu_counter.h> struct user_namespace; struct ipc_ids { int in_use; unsigned short seq; struct rw_semaphore rwsem; struct idr ipcs_idr; int max_idx; int last_idx; /* For wrap around detection */ #ifdef CONFIG_CHECKPOINT_RESTORE int next_id; #endif struct rhashtable key_ht; }; struct ipc_namespace { struct ipc_ids ids[3]; int sem_ctls[4]; int used_sems; unsigned int msg_ctlmax; unsigned int msg_ctlmnb; unsigned int msg_ctlmni; struct percpu_counter percpu_msg_bytes; struct percpu_counter percpu_msg_hdrs; size_t shm_ctlmax; size_t shm_ctlall; unsigned long shm_tot; int shm_ctlmni; /* * Defines whether IPC_RMID is forced for _all_ shm segments regardless * of shmctl() */ int shm_rmid_forced; struct notifier_block ipcns_nb; /* The kern_mount of the mqueuefs sb. We take a ref on it */ struct vfsmount *mq_mnt; /* # queues in this ns, protected by mq_lock */ unsigned int mq_queues_count; /* next fields are set through sysctl */ unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */ unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ unsigned int mq_msg_default; unsigned int mq_msgsize_default; struct ctl_table_set mq_set; struct ctl_table_header *mq_sysctls; struct ctl_table_set ipc_set; struct ctl_table_header *ipc_sysctls; /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; struct ucounts *ucounts; struct llist_node mnt_llist; struct ns_common ns; } __randomize_layout; extern struct ipc_namespace init_ipc_ns; extern spinlock_t mq_lock; #ifdef CONFIG_SYSVIPC extern void shm_destroy_orphaned(struct ipc_namespace *ns); #else /* CONFIG_SYSVIPC */ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} #endif /* CONFIG_SYSVIPC */ #ifdef CONFIG_POSIX_MQUEUE extern int mq_init_ns(struct ipc_namespace *ns); /* * POSIX Message Queue default values: * * MIN_*: Lowest value an admin can set the maximum unprivileged limit to * DFLT_*MAX: Default values for the maximum unprivileged limits * DFLT_{MSG,MSGSIZE}: Default values used when the user doesn't supply * an attribute to the open call and the queue must be created * HARD_*: Highest value the maximums can be set to. These are enforced * on CAP_SYS_RESOURCE apps as well making them inviolate (so make them * suitably high) * * POSIX Requirements: * Per app minimum openable message queues - 8. This does not map well * to the fact that we limit the number of queues on a per namespace * basis instead of a per app basis. So, make the default high enough * that no given app should have a hard time opening 8 queues. * Minimum maximum for HARD_MSGMAX - 32767. I bumped this to 65536. * Minimum maximum for HARD_MSGSIZEMAX - POSIX is silent on this. However, * we have run into a situation where running applications in the wild * require this to be at least 5MB, and preferably 10MB, so I set the * value to 16MB in hopes that this user is the worst of the bunch and * the new maximum will handle anyone else. I may have to revisit this * in the future. */ #define DFLT_QUEUESMAX 256 #define MIN_MSGMAX 1 #define DFLT_MSG 10U #define DFLT_MSGMAX 10 #define HARD_MSGMAX 65536 #define MIN_MSGSIZEMAX 128 #define DFLT_MSGSIZE 8192U #define DFLT_MSGSIZEMAX 8192 #define HARD_MSGSIZEMAX (16*1024*1024) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif #if defined(CONFIG_IPC_NS) extern struct ipc_namespace *copy_ipcs(unsigned long flags, struct user_namespace *user_ns, struct ipc_namespace *ns); static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) refcount_inc(&ns->ns.count); return ns; } static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) { if (ns) { if (refcount_inc_not_zero(&ns->ns.count)) return ns; } return NULL; } extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, struct user_namespace *user_ns, struct ipc_namespace *ns) { if (flags & CLONE_NEWIPC) return ERR_PTR(-EINVAL); return ns; } static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { return ns; } static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) { return ns; } static inline void put_ipc_ns(struct ipc_namespace *ns) { } #endif #ifdef CONFIG_POSIX_MQUEUE_SYSCTL void retire_mq_sysctls(struct ipc_namespace *ns); bool setup_mq_sysctls(struct ipc_namespace *ns); #else /* CONFIG_POSIX_MQUEUE_SYSCTL */ static inline void retire_mq_sysctls(struct ipc_namespace *ns) { } static inline bool setup_mq_sysctls(struct ipc_namespace *ns) { return true; } #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ #ifdef CONFIG_SYSVIPC_SYSCTL bool setup_ipc_sysctls(struct ipc_namespace *ns); void retire_ipc_sysctls(struct ipc_namespace *ns); #else /* CONFIG_SYSVIPC_SYSCTL */ static inline void retire_ipc_sysctls(struct ipc_namespace *ns) { } static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) { return true; } #endif /* CONFIG_SYSVIPC_SYSCTL */ #endif |
13 13 7 5 4 7 5 2 4 2 2 2 3 1 2 7 1 1 4 3 1 2 2 5 4 1 1 1 2 1 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/audit.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/ipv6.h> #include <net/ip.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_log.h> #include <linux/netdevice.h> static const char *nft_log_null_prefix = ""; struct nft_log { struct nf_loginfo loginfo; char *prefix; }; static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) { struct iphdr _iph; const struct iphdr *ih; ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph); if (!ih) return false; audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu", &ih->saddr, &ih->daddr, ih->protocol); return true; } static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) { struct ipv6hdr _ip6h; const struct ipv6hdr *ih; u8 nexthdr; __be16 frag_off; ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); if (!ih) return false; nexthdr = ih->nexthdr; ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off); audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", &ih->saddr, &ih->daddr, nexthdr); return true; } static void nft_log_eval_audit(const struct nft_pktinfo *pkt) { struct sk_buff *skb = pkt->skb; struct audit_buffer *ab; int fam = -1; if (!audit_enabled) return; ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); if (!ab) return; audit_log_format(ab, "mark=%#x", skb->mark); switch (nft_pf(pkt)) { case NFPROTO_BRIDGE: switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; break; case htons(ETH_P_IPV6): fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; break; } break; case NFPROTO_IPV4: fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; break; case NFPROTO_IPV6: fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; break; } if (fam == -1) audit_log_format(ab, " saddr=? daddr=? proto=-1"); audit_log_end(ab); } static void nft_log_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_log *priv = nft_expr_priv(expr); if (priv->loginfo.type == NF_LOG_TYPE_LOG && priv->loginfo.u.log.level == NFT_LOGLEVEL_AUDIT) { nft_log_eval_audit(pkt); return; } nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s", priv->prefix); } static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_GROUP] = { .type = NLA_U16 }, [NFTA_LOG_PREFIX] = { .type = NLA_STRING, .len = NF_LOG_PREFIXLEN - 1 }, [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, }; static int nft_log_modprobe(struct net *net, enum nf_log_type t) { switch (t) { case NF_LOG_TYPE_LOG: return nft_request_module(net, "%s", "nf_log_syslog"); case NF_LOG_TYPE_ULOG: return nft_request_module(net, "%s", "nfnetlink_log"); case NF_LOG_TYPE_MAX: break; } return -ENOENT; } static int nft_log_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; int err; li->type = NF_LOG_TYPE_LOG; if (tb[NFTA_LOG_LEVEL] != NULL && tb[NFTA_LOG_GROUP] != NULL) return -EINVAL; if (tb[NFTA_LOG_GROUP] != NULL) { li->type = NF_LOG_TYPE_ULOG; if (tb[NFTA_LOG_FLAGS] != NULL) return -EINVAL; } nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL_ACCOUNT); if (priv->prefix == NULL) return -ENOMEM; nla_strscpy(priv->prefix, nla, nla_len(nla) + 1); } else { priv->prefix = (char *)nft_log_null_prefix; } switch (li->type) { case NF_LOG_TYPE_LOG: if (tb[NFTA_LOG_LEVEL] != NULL) { li->u.log.level = ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); } else { li->u.log.level = NFT_LOGLEVEL_WARNING; } if (li->u.log.level > NFT_LOGLEVEL_AUDIT) { err = -EINVAL; goto err1; } if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); if (li->u.log.logflags & ~NF_LOG_MASK) { err = -EINVAL; goto err1; } } break; case NF_LOG_TYPE_ULOG: li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); if (tb[NFTA_LOG_SNAPLEN] != NULL) { li->u.ulog.flags |= NF_LOG_F_COPY_LEN; li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); } if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { li->u.ulog.qthreshold = ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); } break; } if (li->u.log.level == NFT_LOGLEVEL_AUDIT) return 0; err = nf_logger_find_get(ctx->family, li->type); if (err < 0) { if (nft_log_modprobe(ctx->net, li->type) == -EAGAIN) err = -EAGAIN; goto err1; } return 0; err1: if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); return err; } static void nft_log_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); if (li->u.log.level == NFT_LOGLEVEL_AUDIT) return; nf_logger_put(ctx->family, li->type); } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_log *priv = nft_expr_priv(expr); const struct nf_loginfo *li = &priv->loginfo; if (priv->prefix != nft_log_null_prefix) if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) goto nla_put_failure; switch (li->type) { case NF_LOG_TYPE_LOG: if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level))) goto nla_put_failure; if (li->u.log.logflags) { if (nla_put_be32(skb, NFTA_LOG_FLAGS, htonl(li->u.log.logflags))) goto nla_put_failure; } break; case NF_LOG_TYPE_ULOG: if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) goto nla_put_failure; if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) { if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, htonl(li->u.ulog.copy_len))) goto nla_put_failure; } if (li->u.ulog.qthreshold) { if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, htons(li->u.ulog.qthreshold))) goto nla_put_failure; } break; } return 0; nla_put_failure: return -1; } static struct nft_expr_type nft_log_type; static const struct nft_expr_ops nft_log_ops = { .type = &nft_log_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), .eval = nft_log_eval, .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_log_type __read_mostly = { .name = "log", .ops = &nft_log_ops, .policy = nft_log_policy, .maxattr = NFTA_LOG_MAX, .owner = THIS_MODULE, }; static int __init nft_log_module_init(void) { return nft_register_expr(&nft_log_type); } static void __exit nft_log_module_exit(void) { nft_unregister_expr(&nft_log_type); } module_init(nft_log_module_init); module_exit(nft_log_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("log"); MODULE_DESCRIPTION("Netfilter nf_tables log module"); |
8 4 2 30 3 1 26 29 29 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Authenc: Simple AEAD wrapper for IPsec * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/aead.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/authenc.h> #include <crypto/null.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/spinlock.h> struct authenc_instance_ctx { struct crypto_ahash_spawn auth; struct crypto_skcipher_spawn enc; unsigned int reqoff; }; struct crypto_authenc_ctx { struct crypto_ahash *auth; struct crypto_skcipher *enc; struct crypto_sync_skcipher *null; }; struct authenc_request_ctx { struct scatterlist src[2]; struct scatterlist dst[2]; char tail[]; }; static void authenc_request_complete(struct aead_request *req, int err) { if (err != -EINPROGRESS) aead_request_complete(req, err); } int crypto_authenc_extractkeys(struct crypto_authenc_keys *keys, const u8 *key, unsigned int keylen) { struct rtattr *rta = (struct rtattr *)key; struct crypto_authenc_key_param *param; if (!RTA_OK(rta, keylen)) return -EINVAL; if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) return -EINVAL; /* * RTA_OK() didn't align the rtattr's payload when validating that it * fits in the buffer. Yet, the keys should start on the next 4-byte * aligned boundary. To avoid confusion, require that the rtattr * payload be exactly the param struct, which has a 4-byte aligned size. */ if (RTA_PAYLOAD(rta) != sizeof(*param)) return -EINVAL; BUILD_BUG_ON(sizeof(*param) % RTA_ALIGNTO); param = RTA_DATA(rta); keys->enckeylen = be32_to_cpu(param->enckeylen); key += rta->rta_len; keylen -= rta->rta_len; if (keylen < keys->enckeylen) return -EINVAL; keys->authkeylen = keylen - keys->enckeylen; keys->authkey = key; keys->enckey = key + keys->authkeylen; return 0; } EXPORT_SYMBOL_GPL(crypto_authenc_extractkeys); static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key, unsigned int keylen) { struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct crypto_ahash *auth = ctx->auth; struct crypto_skcipher *enc = ctx->enc; struct crypto_authenc_keys keys; int err = -EINVAL; if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) goto out; crypto_ahash_clear_flags(auth, CRYPTO_TFM_REQ_MASK); crypto_ahash_set_flags(auth, crypto_aead_get_flags(authenc) & CRYPTO_TFM_REQ_MASK); err = crypto_ahash_setkey(auth, keys.authkey, keys.authkeylen); if (err) goto out; crypto_skcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen); out: memzero_explicit(&keys, sizeof(keys)); return err; } static void authenc_geniv_ahash_done(void *data, int err) { struct aead_request *req = data; struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); if (err) goto out; scatterwalk_map_and_copy(ahreq->result, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(authenc), 1); out: aead_request_complete(req, err); } static int crypto_authenc_genicv(struct aead_request *req, unsigned int flags) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_ahash *auth = ctx->auth; struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); u8 *hash = areq_ctx->tail; int err; ahash_request_set_tfm(ahreq, auth); ahash_request_set_crypt(ahreq, req->dst, hash, req->assoclen + req->cryptlen); ahash_request_set_callback(ahreq, flags, authenc_geniv_ahash_done, req); err = crypto_ahash_digest(ahreq); if (err) return err; scatterwalk_map_and_copy(hash, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(authenc), 1); return 0; } static void crypto_authenc_encrypt_done(void *data, int err) { struct aead_request *areq = data; if (err) goto out; err = crypto_authenc_genicv(areq, 0); out: authenc_request_complete(areq, err); } static int crypto_authenc_copy_assoc(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); skcipher_request_set_sync_tfm(skreq, ctx->null); skcipher_request_set_callback(skreq, aead_request_flags(req), NULL, NULL); skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen, NULL); return crypto_skcipher_encrypt(skreq); } static int crypto_authenc_encrypt(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_skcipher *enc = ctx->enc; unsigned int cryptlen = req->cryptlen; struct skcipher_request *skreq = (void *)(areq_ctx->tail + ictx->reqoff); struct scatterlist *src, *dst; int err; src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen); dst = src; if (req->src != req->dst) { err = crypto_authenc_copy_assoc(req); if (err) return err; dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); } skcipher_request_set_tfm(skreq, enc); skcipher_request_set_callback(skreq, aead_request_flags(req), crypto_authenc_encrypt_done, req); skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv); err = crypto_skcipher_encrypt(skreq); if (err) return err; return crypto_authenc_genicv(req, aead_request_flags(req)); } static int crypto_authenc_decrypt_tail(struct aead_request *req, unsigned int flags) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); struct skcipher_request *skreq = (void *)(areq_ctx->tail + ictx->reqoff); unsigned int authsize = crypto_aead_authsize(authenc); u8 *ihash = ahreq->result + authsize; struct scatterlist *src, *dst; scatterwalk_map_and_copy(ihash, req->src, ahreq->nbytes, authsize, 0); if (crypto_memneq(ihash, ahreq->result, authsize)) return -EBADMSG; src = scatterwalk_ffwd(areq_ctx->src, req->src, req->assoclen); dst = src; if (req->src != req->dst) dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); skcipher_request_set_tfm(skreq, ctx->enc); skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); skcipher_request_set_crypt(skreq, src, dst, req->cryptlen - authsize, req->iv); return crypto_skcipher_decrypt(skreq); } static void authenc_verify_ahash_done(void *data, int err) { struct aead_request *req = data; if (err) goto out; err = crypto_authenc_decrypt_tail(req, 0); out: authenc_request_complete(req, err); } static int crypto_authenc_decrypt(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(authenc); struct aead_instance *inst = aead_alg_instance(authenc); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_ahash *auth = ctx->auth; struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ictx->reqoff); u8 *hash = areq_ctx->tail; int err; ahash_request_set_tfm(ahreq, auth); ahash_request_set_crypt(ahreq, req->src, hash, req->assoclen + req->cryptlen - authsize); ahash_request_set_callback(ahreq, aead_request_flags(req), authenc_verify_ahash_done, req); err = crypto_ahash_digest(ahreq); if (err) return err; return crypto_authenc_decrypt_tail(req, aead_request_flags(req)); } static int crypto_authenc_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *auth; struct crypto_skcipher *enc; struct crypto_sync_skcipher *null; int err; auth = crypto_spawn_ahash(&ictx->auth); if (IS_ERR(auth)) return PTR_ERR(auth); enc = crypto_spawn_skcipher(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; null = crypto_get_default_null_skcipher(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_skcipher; ctx->auth = auth; ctx->enc = enc; ctx->null = null; crypto_aead_set_reqsize( tfm, sizeof(struct authenc_request_ctx) + ictx->reqoff + max_t(unsigned int, crypto_ahash_reqsize(auth) + sizeof(struct ahash_request), sizeof(struct skcipher_request) + crypto_skcipher_reqsize(enc))); return 0; err_free_skcipher: crypto_free_skcipher(enc); err_free_ahash: crypto_free_ahash(auth); return err; } static void crypto_authenc_exit_tfm(struct crypto_aead *tfm) { struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->auth); crypto_free_skcipher(ctx->enc); crypto_put_default_null_skcipher(); } static void crypto_authenc_free(struct aead_instance *inst) { struct authenc_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_skcipher(&ctx->enc); crypto_drop_ahash(&ctx->auth); kfree(inst); } static int crypto_authenc_create(struct crypto_template *tmpl, struct rtattr **tb) { u32 mask; struct aead_instance *inst; struct authenc_instance_ctx *ctx; struct skcipher_alg_common *enc; struct hash_alg_common *auth; struct crypto_alg *auth_base; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = aead_instance_ctx(inst); err = crypto_grab_ahash(&ctx->auth, aead_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; auth = crypto_spawn_ahash_alg(&ctx->auth); auth_base = &auth->base; err = crypto_grab_skcipher(&ctx->enc, aead_crypto_instance(inst), crypto_attr_alg_name(tb[2]), 0, mask); if (err) goto err_free_inst; enc = crypto_spawn_skcipher_alg_common(&ctx->enc); ctx->reqoff = 2 * auth->digestsize; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", auth_base->cra_name, enc->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", auth_base->cra_driver_name, enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_priority = enc->base.cra_priority * 10 + auth_base->cra_priority; inst->alg.base.cra_blocksize = enc->base.cra_blocksize; inst->alg.base.cra_alignmask = enc->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct crypto_authenc_ctx); inst->alg.ivsize = enc->ivsize; inst->alg.chunksize = enc->chunksize; inst->alg.maxauthsize = auth->digestsize; inst->alg.init = crypto_authenc_init_tfm; inst->alg.exit = crypto_authenc_exit_tfm; inst->alg.setkey = crypto_authenc_setkey; inst->alg.encrypt = crypto_authenc_encrypt; inst->alg.decrypt = crypto_authenc_decrypt; inst->free = crypto_authenc_free; err = aead_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_authenc_free(inst); } return err; } static struct crypto_template crypto_authenc_tmpl = { .name = "authenc", .create = crypto_authenc_create, .module = THIS_MODULE, }; static int __init crypto_authenc_module_init(void) { return crypto_register_template(&crypto_authenc_tmpl); } static void __exit crypto_authenc_module_exit(void) { crypto_unregister_template(&crypto_authenc_tmpl); } subsys_initcall(crypto_authenc_module_init); module_exit(crypto_authenc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Simple AEAD wrapper for IPsec"); MODULE_ALIAS_CRYPTO("authenc"); |
19890 20103 3446 3447 65 1 2 8 54 2 1 20 33 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/kernel.h> #include <linux/nospec.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/wordpart.h> /* out-of-line parts */ #if !defined(INLINE_COPY_FROM_USER) || defined(CONFIG_RUST) unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { return _inline_copy_from_user(to, from, n); } EXPORT_SYMBOL(_copy_from_user); #endif #if !defined(INLINE_COPY_TO_USER) || defined(CONFIG_RUST) unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { return _inline_copy_to_user(to, from, n); } EXPORT_SYMBOL(_copy_to_user); #endif /** * check_zeroed_user: check if a userspace buffer only contains zero bytes * @from: Source address, in userspace. * @size: Size of buffer. * * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for * userspace addresses (and is more efficient because we don't care where the * first non-zero byte is). * * Returns: * * 0: There were non-zero bytes present in the buffer. * * 1: The buffer was full of zero bytes. * * -EFAULT: access to userspace failed. */ int check_zeroed_user(const void __user *from, size_t size) { unsigned long val; uintptr_t align = (uintptr_t) from % sizeof(unsigned long); if (unlikely(size == 0)) return 1; from -= align; size += align; if (!user_read_access_begin(from, size)) return -EFAULT; unsafe_get_user(val, (unsigned long __user *) from, err_fault); if (align) val &= ~aligned_byte_mask(align); while (size > sizeof(unsigned long)) { if (unlikely(val)) goto done; from += sizeof(unsigned long); size -= sizeof(unsigned long); unsafe_get_user(val, (unsigned long __user *) from, err_fault); } if (size < sizeof(unsigned long)) val &= aligned_byte_mask(size); done: user_read_access_end(); return (val == 0); err_fault: user_read_access_end(); return -EFAULT; } EXPORT_SYMBOL(check_zeroed_user); |
5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/inode.c * * Copyright (C) 1992 Rick Sladkey * * nfs inode and superblock handling functions * * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some * experimental NFS changes. Modularisation taken straight from SYS5 fs. * * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. * J.S.Peatfield@damtp.cam.ac.uk * */ #include <linux/module.h> #include <linux/init.h> #include <linux/sched/signal.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/vfs.h> #include <linux/inet.h> #include <linux/nfs_xdr.h> #include <linux/slab.h> #include <linux/compat.h> #include <linux/freezer.h> #include <linux/uaccess.h> #include <linux/iversion.h> #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #include "iostat.h" #include "internal.h" #include "fscache.h" #include "pnfs.h" #include "nfs.h" #include "netns.h" #include "sysfs.h" #include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 /* Default is to see 64-bit inode numbers */ static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; static int nfs_update_inode(struct inode *, struct nfs_fattr *); static struct kmem_cache * nfs_inode_cachep; static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { return nfs_fileid_to_ino_t(fattr->fileid); } int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; return 0; } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); /** * nfs_compat_user_ino64 - returns the user-visible inode number * @fileid: 64-bit fileid * * This function returns a 32-bit inode number if the boot parameter * nfs.enable_ino64 is zero. */ u64 nfs_compat_user_ino64(u64 fileid) { #ifdef CONFIG_COMPAT compat_ulong_t ino; #else unsigned long ino; #endif if (enable_ino64) return fileid; ino = fileid; if (sizeof(ino) < sizeof(fileid)) ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8; return ino; } int nfs_drop_inode(struct inode *inode) { return NFS_STALE(inode) || generic_drop_inode(inode); } EXPORT_SYMBOL_GPL(nfs_drop_inode); void nfs_clear_inode(struct inode *inode) { /* * The following should never happen... */ WARN_ON_ONCE(nfs_have_writebacks(inode)); WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files)); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); nfs_fscache_clear_inode(inode); } EXPORT_SYMBOL_GPL(nfs_clear_inode); void nfs_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); nfs_clear_inode(inode); } int nfs_sync_inode(struct inode *inode) { inode_dio_wait(inode); return nfs_wb_all(inode); } EXPORT_SYMBOL_GPL(nfs_sync_inode); /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk * @mapping: pointer to struct address_space */ int nfs_sync_mapping(struct address_space *mapping) { int ret = 0; if (mapping->nrpages != 0) { unmap_mapping_range(mapping, 0, 0, 0); ret = nfs_wb_all(mapping->host); } return ret; } static int nfs_attribute_timeout(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } static bool nfs_check_cache_flags_invalid(struct inode *inode, unsigned long flags) { unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); return (cache_validity & flags) != 0; } bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) { if (nfs_check_cache_flags_invalid(inode, flags)) return true; return nfs_attribute_cache_expired(inode); } EXPORT_SYMBOL_GPL(nfs_check_cache_invalid); #ifdef CONFIG_NFS_V4_2 static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) { return nfsi->xattr_cache != NULL; } #else static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) { return false; } #endif void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); if (nfs_have_delegated_attributes(inode)) { if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_XATTR); flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); } if (!nfs_has_xattr_cache(nfsi)) flags &= ~NFS_INO_INVALID_XATTR; if (flags & NFS_INO_INVALID_DATA) nfs_fscache_invalidate(inode, 0); flags &= ~NFS_INO_REVAL_FORCED; nfsi->cache_validity |= flags; if (inode->i_mapping->nrpages == 0) { nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; nfs_ooo_clear(nfsi); } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { nfs_ooo_clear(nfsi); } trace_nfs_set_cache_invalid(inode, 0); } EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); /* * Invalidate the local caches */ static void nfs_zap_caches_locked(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_XATTR); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_XATTR); nfs_zap_label_cache_locked(nfsi); } void nfs_zap_caches(struct inode *inode) { spin_lock(&inode->i_lock); nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { if (mapping->nrpages != 0) { spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); spin_unlock(&inode->i_lock); } } void nfs_zap_acl_cache(struct inode *inode) { void (*clear_acl_cache)(struct inode *); clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; if (clear_acl_cache != NULL) clear_acl_cache(inode); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { if (nfs_have_delegated_atime(inode)) return; spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_invalidate_atime); /* * Invalidate, but do not unhash, the inode. * NB: must be called with inode->i_lock held! */ static void nfs_set_inode_stale_locked(struct inode *inode) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); nfs_zap_caches_locked(inode); trace_nfs_set_inode_stale(inode); } void nfs_set_inode_stale(struct inode *inode) { spin_lock(&inode->i_lock); nfs_set_inode_stale_locked(inode); spin_unlock(&inode->i_lock); } struct nfs_find_desc { struct nfs_fh *fh; struct nfs_fattr *fattr; }; /* * In NFSv3 we can have 64bit inode numbers. In order to support * this, and re-exported directories (also seen in NFSv2) * we are forced to allow 2 different inodes to have the same * i_ino. */ static int nfs_find_actor(struct inode *inode, void *opaque) { struct nfs_find_desc *desc = opaque; struct nfs_fh *fh = desc->fh; struct nfs_fattr *fattr = desc->fattr; if (NFS_FILEID(inode) != fattr->fileid) return 0; if (inode_wrong_type(inode, fattr->mode)) return 0; if (nfs_compare_fh(NFS_FH(inode), fh)) return 0; if (is_bad_inode(inode) || NFS_STALE(inode)) return 0; return 1; } static int nfs_init_locked(struct inode *inode, void *opaque) { struct nfs_find_desc *desc = opaque; struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); inode->i_mode = fattr->mode; nfs_copy_fh(NFS_FH(inode), desc->fh); return 0; } #ifdef CONFIG_NFS_V4_SECURITY_LABEL static void nfs_clear_label_invalid(struct inode *inode) { spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL; spin_unlock(&inode->i_lock); } void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { int error; if (fattr->label == NULL) return; if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { error = security_inode_notifysecctx(inode, fattr->label->label, fattr->label->len); if (error) printk(KERN_ERR "%s() %s %d " "security_inode_notifysecctx() %d\n", __func__, (char *)fattr->label->label, fattr->label->len, error); nfs_clear_label_invalid(inode); } } struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { struct nfs4_label *label; if (!(server->caps & NFS_CAP_SECURITY_LABEL)) return NULL; label = kzalloc(sizeof(struct nfs4_label), flags); if (label == NULL) return ERR_PTR(-ENOMEM); label->label = kzalloc(NFS4_MAXLABELLEN, flags); if (label->label == NULL) { kfree(label); return ERR_PTR(-ENOMEM); } label->len = NFS4_MAXLABELLEN; return label; } EXPORT_SYMBOL_GPL(nfs4_label_alloc); #else void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { } #endif EXPORT_SYMBOL_GPL(nfs_setsecurity); /* Search for inode identified by fh, fileid and i_mode in inode cache. */ struct inode * nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) { struct nfs_find_desc desc = { .fh = fh, .fattr = fattr, }; struct inode *inode; unsigned long hash; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) || !(fattr->valid & NFS_ATTR_FATTR_TYPE)) return NULL; hash = nfs_fattr_to_ino_t(fattr); inode = ilookup5(sb, hash, nfs_find_actor, &desc); dprintk("%s: returning %p\n", __func__, inode); return inode; } static void nfs_inode_init_regular(struct nfs_inode *nfsi) { atomic_long_set(&nfsi->nrequests, 0); atomic_long_set(&nfsi->redirtied_pages, 0); INIT_LIST_HEAD(&nfsi->commit_info.list); atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); mutex_init(&nfsi->commit_mutex); } static void nfs_inode_init_dir(struct nfs_inode *nfsi) { nfsi->cache_change_attribute = 0; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); init_rwsem(&nfsi->rmdir_sem); } /* * This is our front-end to iget that looks up inodes by file handle * instead of inode number. */ struct inode * nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) { struct nfs_find_desc desc = { .fh = fh, .fattr = fattr }; struct inode *inode = ERR_PTR(-ENOENT); u64 fattr_supported = NFS_SB(sb)->fattr_valid; unsigned long hash; nfs_attr_check_mountpoint(sb, fattr); if (nfs_attr_use_mounted_on_fileid(fattr)) fattr->fileid = fattr->mounted_on_fileid; else if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) goto out_no_inode; if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) goto out_no_inode; hash = nfs_fattr_to_ino_t(fattr); inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc); if (inode == NULL) { inode = ERR_PTR(-ENOMEM); goto out_no_inode; } if (inode->i_state & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long now = jiffies; /* We set i_ino for the few things that still rely on it, * such as stat(2) */ inode->i_ino = hash; /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; nfsi->cache_validity = 0; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && (fattr_supported & NFS_ATTR_FATTR_MODE)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; nfs_inode_init_regular(nfsi); mapping_set_large_folios(inode->i_mapping); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; inode->i_data.a_ops = &nfs_dir_aops; nfs_inode_init_dir(nfsi); /* Deal with crossing mountpoints */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) inode->i_op = &nfs_referral_inode_operations; else inode->i_op = &nfs_mountpoint_inode_operations; inode->i_fop = NULL; inode->i_flags |= S_AUTOMOUNT; } } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &nfs_symlink_inode_operations; inode_nohighmem(inode); } else init_special_inode(inode, inode->i_mode, fattr->rdev); inode_set_atime(inode, 0, 0); inode_set_mtime(inode, 0, 0); inode_set_ctime(inode, 0, 0); inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); inode->i_uid = make_kuid(&init_user_ns, -2); inode->i_gid = make_kgid(&init_user_ns, -2); inode->i_blocks = 0; nfsi->write_io = 0; nfsi->read_io = 0; nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode_set_atime_to_ts(inode, fattr->atime); else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode_set_mtime_to_ts(inode, fattr->mtime); else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE); if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (fattr_supported & NFS_ATTR_FATTR_OWNER) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; else if (fattr_supported & NFS_ATTR_FATTR_GROUP) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED && fattr->size != 0) nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED && fattr->size != 0) nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_setsecurity(inode, fattr); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; nfsi->access_cache = RB_ROOT; nfs_fscache_init_inode(inode); unlock_new_inode(inode); } else { int err = nfs_refresh_inode(inode, fattr); if (err < 0) { iput(inode); inode = ERR_PTR(err); goto out_no_inode; } } dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), nfs_display_fhandle_hash(fh), atomic_read(&inode->i_count)); out: return inode; out_no_inode: dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } EXPORT_SYMBOL_GPL(nfs_fhget); static void nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) { unsigned long cache_validity = NFS_I(inode)->cache_validity; if (nfs_have_delegated_mtime(inode)) { if (!(cache_validity & NFS_INO_INVALID_CTIME)) fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME | NFS_ATTR_FATTR_CTIME); if (!(cache_validity & NFS_INO_INVALID_MTIME)) fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME | NFS_ATTR_FATTR_MTIME); if (!(cache_validity & NFS_INO_INVALID_ATIME)) fattr->valid &= ~NFS_ATTR_FATTR_ATIME; } else if (nfs_have_delegated_atime(inode)) { if (!(cache_validity & NFS_INO_INVALID_ATIME)) fattr->valid &= ~NFS_ATTR_FATTR_ATIME; } } void nfs_update_delegated_atime(struct inode *inode) { spin_lock(&inode->i_lock); if (nfs_have_delegated_atime(inode)) { inode_update_timestamps(inode, S_ATIME); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ATIME; } spin_unlock(&inode->i_lock); } void nfs_update_delegated_mtime_locked(struct inode *inode) { if (nfs_have_delegated_mtime(inode)) { inode_update_timestamps(inode, S_CTIME | S_MTIME); NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME); } } void nfs_update_delegated_mtime(struct inode *inode) { spin_lock(&inode->i_lock); nfs_update_delegated_mtime_locked(inode); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct nfs_fattr *fattr; int error = 0; nfs_inc_stats(inode, NFSIOS_VFSSETATTR); /* skip mode change if it's just for clearing setuid/setgid */ if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) attr->ia_valid &= ~ATTR_MODE; if (attr->ia_valid & ATTR_SIZE) { BUG_ON(!S_ISREG(inode->i_mode)); error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; if (attr->ia_size == i_size_read(inode)) attr->ia_valid &= ~ATTR_SIZE; } if (nfs_have_delegated_mtime(inode)) { if (attr->ia_valid & ATTR_MTIME) { nfs_update_delegated_mtime(inode); attr->ia_valid &= ~ATTR_MTIME; } if (attr->ia_valid & ATTR_ATIME) { nfs_update_delegated_atime(inode); attr->ia_valid &= ~ATTR_ATIME; } } /* Optimization: if the end result is no change, don't RPC */ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; trace_nfs_setattr_enter(inode); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) nfs_sync_inode(inode); fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) { error = -ENOMEM; goto out; } error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) error = nfs_refresh_inode(inode, fattr); nfs_free_fattr(fattr); out: trace_nfs_setattr_exit(inode, error); return error; } EXPORT_SYMBOL_GPL(nfs_setattr); /** * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall * @inode: inode of the file used * @offset: file offset to start truncating * * This is a copy of the common vmtruncate, but with the locking * corrected to take into account the fact that NFS requires * inode->i_size to be updated under the inode->i_lock. * Note: must be called with inode->i_lock held! */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { int err; err = inode_newsize_ok(inode, offset); if (err) goto out; trace_nfs_size_truncate(inode, offset); i_size_write(inode, offset); /* Optimisation */ if (offset == 0) { NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; nfs_ooo_clear(NFS_I(inode)); } NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); nfs_update_delegated_mtime_locked(inode); spin_lock(&inode->i_lock); out: return err; } /** * nfs_setattr_update_inode - Update inode metadata after a setattr call. * @inode: pointer to struct inode * @attr: pointer to struct iattr * @fattr: pointer to struct nfs_fattr * * Note: we do this in the *proc.c in order to ensure that * it works for things like exclusive creates too. */ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *fattr) { /* Barrier: bump the attribute generation count. */ nfs_fattr_set_barrier(fattr); spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & ATTR_SIZE) != 0) { if (!nfs_have_delegated_mtime(inode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME; if ((attr->ia_valid & ATTR_KILL_SUID) != 0 && inode->i_mode & S_ISUID) inode->i_mode &= ~S_ISUID; if (setattr_should_drop_sgid(&nop_mnt_idmap, inode)) inode->i_mode &= ~S_ISGID; if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; } if ((attr->ia_valid & ATTR_UID) != 0) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL); } if (attr->ia_valid & (ATTR_ATIME_SET|ATTR_ATIME)) { NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode_set_atime_to_ts(inode, fattr->atime); else if (attr->ia_valid & ATTR_ATIME_SET) inode_set_atime_to_ts(inode, attr->ia_atime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); } if (attr->ia_valid & (ATTR_MTIME_SET|ATTR_MTIME)) { NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode_set_mtime_to_ts(inode, fattr->mtime); else if (attr->ia_valid & ATTR_MTIME_SET) inode_set_mtime_to_ts(inode, attr->ia_mtime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); } if (fattr->valid) nfs_update_inode(inode, fattr); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); /* * Don't request help from readdirplus if the file is being written to, * or if attribute caching is turned off */ static bool nfs_getattr_readdirplus_enable(const struct inode *inode) { return nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && !nfs_have_writebacks(inode) && NFS_MAXATTRTIMEO(inode) > 5 * HZ; } static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) { if (!IS_ROOT(dentry)) { struct dentry *parent = dget_parent(dentry); nfs_readdir_record_entry_cache_miss(d_inode(parent)); dput(parent); } } static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) { if (!IS_ROOT(dentry)) { struct dentry *parent = dget_parent(dentry); nfs_readdir_record_entry_cache_hit(d_inode(parent)); dput(parent); } } static u32 nfs_get_valid_attrmask(struct inode *inode) { unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); u32 reply_mask = STATX_INO | STATX_TYPE; if (!(cache_validity & NFS_INO_INVALID_ATIME)) reply_mask |= STATX_ATIME; if (!(cache_validity & NFS_INO_INVALID_CTIME)) reply_mask |= STATX_CTIME; if (!(cache_validity & NFS_INO_INVALID_MTIME)) reply_mask |= STATX_MTIME; if (!(cache_validity & NFS_INO_INVALID_SIZE)) reply_mask |= STATX_SIZE; if (!(cache_validity & NFS_INO_INVALID_NLINK)) reply_mask |= STATX_NLINK; if (!(cache_validity & NFS_INO_INVALID_MODE)) reply_mask |= STATX_MODE; if (!(cache_validity & NFS_INO_INVALID_OTHER)) reply_mask |= STATX_UID | STATX_GID; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; if (!(cache_validity & NFS_INO_INVALID_CHANGE)) reply_mask |= STATX_CHANGE_COOKIE; return reply_mask; } int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct nfs_server *server = NFS_SERVER(inode); unsigned long cache_validity; int err = 0; bool force_sync = query_flags & AT_STATX_FORCE_SYNC; bool do_update = false; bool readdirplus_enabled = nfs_getattr_readdirplus_enable(inode); trace_nfs_getattr_enter(inode); request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_CHANGE_COOKIE; if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { if (readdirplus_enabled) nfs_readdirplus_parent_cache_hit(path->dentry); goto out_no_revalidate; } /* Flush out writes to the server in order to update c/mtime/version. */ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && S_ISREG(inode->i_mode)) { if (nfs_have_delegated_mtime(inode)) filemap_fdatawrite(inode->i_mapping); else filemap_write_and_wait(inode->i_mapping); } /* * We may force a getattr if the user cares about atime. * * Note that we only have to check the vfsmount flags here: * - NFS always sets S_NOATIME by so checking it would give a * bogus result * - NFS never sets SB_NOATIME or SB_NODIRATIME so there is * no point in checking those. */ if ((path->mnt->mnt_flags & MNT_NOATIME) || ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) request_mask &= ~STATX_ATIME; /* Is the user requesting attributes that might need revalidation? */ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| STATX_MTIME|STATX_UID|STATX_GID| STATX_SIZE|STATX_BLOCKS| STATX_CHANGE_COOKIE))) goto out_no_revalidate; /* Check whether the cached attributes are stale */ do_update |= force_sync || nfs_attribute_cache_expired(inode); cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); do_update |= cache_validity & NFS_INO_INVALID_CHANGE; if (request_mask & STATX_ATIME) do_update |= cache_validity & NFS_INO_INVALID_ATIME; if (request_mask & STATX_CTIME) do_update |= cache_validity & NFS_INO_INVALID_CTIME; if (request_mask & STATX_MTIME) do_update |= cache_validity & NFS_INO_INVALID_MTIME; if (request_mask & STATX_SIZE) do_update |= cache_validity & NFS_INO_INVALID_SIZE; if (request_mask & STATX_NLINK) do_update |= cache_validity & NFS_INO_INVALID_NLINK; if (request_mask & STATX_MODE) do_update |= cache_validity & NFS_INO_INVALID_MODE; if (request_mask & (STATX_UID | STATX_GID)) do_update |= cache_validity & NFS_INO_INVALID_OTHER; if (request_mask & STATX_BLOCKS) do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; if (do_update) { if (readdirplus_enabled) nfs_readdirplus_parent_cache_miss(path->dentry); err = __nfs_revalidate_inode(server, inode); if (err) goto out; } else if (readdirplus_enabled) nfs_readdirplus_parent_cache_hit(path->dentry); out_no_revalidate: /* Only return attributes that were revalidated. */ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask; generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); stat->change_cookie = inode_peek_iversion_raw(inode); stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED) stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; out: trace_nfs_getattr_exit(inode, err); return err; } EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { refcount_set(&l_ctx->count, 1); l_ctx->lockowner = current->files; INIT_LIST_HEAD(&l_ctx->list); atomic_set(&l_ctx->io_count, 0); } static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) { struct nfs_lock_context *pos; list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) { if (pos->lockowner != current->files) continue; if (refcount_inc_not_zero(&pos->count)) return pos; } return NULL; } struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) { struct nfs_lock_context *res, *new = NULL; struct inode *inode = d_inode(ctx->dentry); rcu_read_lock(); res = __nfs_find_lock_context(ctx); rcu_read_unlock(); if (res == NULL) { new = kmalloc(sizeof(*new), GFP_KERNEL_ACCOUNT); if (new == NULL) return ERR_PTR(-ENOMEM); nfs_init_lock_context(new); spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); if (res == NULL) { new->open_context = get_nfs_open_context(ctx); if (new->open_context) { list_add_tail_rcu(&new->list, &ctx->lock_context.list); res = new; new = NULL; } else res = ERR_PTR(-EBADF); } spin_unlock(&inode->i_lock); kfree(new); } return res; } EXPORT_SYMBOL_GPL(nfs_get_lock_context); void nfs_put_lock_context(struct nfs_lock_context *l_ctx) { struct nfs_open_context *ctx = l_ctx->open_context; struct inode *inode = d_inode(ctx->dentry); if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; list_del_rcu(&l_ctx->list); spin_unlock(&inode->i_lock); put_nfs_open_context(ctx); kfree_rcu(l_ctx, rcu_head); } EXPORT_SYMBOL_GPL(nfs_put_lock_context); /** * nfs_close_context - Common close_context() routine NFSv2/v3 * @ctx: pointer to context * @is_sync: is this a synchronous close * * Ensure that the attributes are up to date if we're mounted * with close-to-open semantics and we have cached data that will * need to be revalidated on open. */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) { struct nfs_inode *nfsi; struct inode *inode; if (!(ctx->mode & FMODE_WRITE)) return; if (!is_sync) return; inode = d_inode(ctx->dentry); if (nfs_have_read_or_write_delegation(inode)) return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) return; if (nfsi->cache_validity & NFS_INO_INVALID_DATA) return; if (!list_empty(&nfsi->open_files)) return; if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO) return; nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); } EXPORT_SYMBOL_GPL(nfs_close_context); struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode, struct file *filp) { struct nfs_open_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); if (!ctx) return ERR_PTR(-ENOMEM); nfs_sb_active(dentry->d_sb); ctx->dentry = dget(dentry); if (filp) ctx->cred = get_cred(filp->f_cred); else ctx->cred = get_current_cred(); rcu_assign_pointer(ctx->ll_cred, NULL); ctx->state = NULL; ctx->mode = f_mode; ctx->flags = 0; ctx->error = 0; ctx->flock_owner = (fl_owner_t)filp; nfs_init_lock_context(&ctx->lock_context); ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); ctx->mdsthreshold = NULL; return ctx; } EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count)) return ctx; return NULL; } EXPORT_SYMBOL_GPL(get_nfs_open_context); static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { struct inode *inode = d_inode(ctx->dentry); struct super_block *sb = ctx->dentry->d_sb; if (!refcount_dec_and_test(&ctx->lock_context.count)) return; if (!list_empty(&ctx->list)) { spin_lock(&inode->i_lock); list_del_rcu(&ctx->list); spin_unlock(&inode->i_lock); } if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); put_cred(ctx->cred); dput(ctx->dentry); nfs_sb_deactive(sb); put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1)); kfree(ctx->mdsthreshold); kfree_rcu(ctx, rcu_head); } void put_nfs_open_context(struct nfs_open_context *ctx) { __put_nfs_open_context(ctx, 0); } EXPORT_SYMBOL_GPL(put_nfs_open_context); static void put_nfs_open_context_sync(struct nfs_open_context *ctx) { __put_nfs_open_context(ctx, 1); } /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) { struct inode *inode = d_inode(ctx->dentry); struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); if (list_empty(&nfsi->open_files) && nfs_ooo_test(nfsi)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA | NFS_INO_REVAL_FORCED); list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { filp->private_data = get_nfs_open_context(ctx); set_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); if (list_empty(&ctx->list)) nfs_inode_attach_open_context(ctx); } EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* * Given an inode, search for an open context with the desired characteristics */ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct cred *cred, fmode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; rcu_read_lock(); list_for_each_entry_rcu(pos, &nfsi->open_files, list) { if (cred != NULL && cred_fscmp(pos->cred, cred) != 0) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; if (!test_bit(NFS_CONTEXT_FILE_OPEN, &pos->flags)) continue; ctx = get_nfs_open_context(pos); if (ctx) break; } rcu_read_unlock(); return ctx; } void nfs_file_clear_open_context(struct file *filp) { struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { struct inode *inode = d_inode(ctx->dentry); clear_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); /* * We fatal error on write before. Try to writeback * every page again. */ if (ctx->error < 0) invalidate_inode_pages2(inode->i_mapping); filp->private_data = NULL; put_nfs_open_context_sync(ctx); } } /* * These allocate and release file read/write context information. */ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; ctx = alloc_nfs_open_context(file_dentry(filp), flags_to_mode(filp->f_flags), filp); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); nfs_fscache_open_file(inode, filp); return 0; } /* * This function is called whenever some part of NFS notices that * the cached attributes have to be refreshed. */ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { int status = -ESTALE; struct nfs_fattr *fattr = NULL; struct nfs_inode *nfsi = NFS_I(inode); dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); trace_nfs_revalidate_inode_enter(inode); if (is_bad_inode(inode)) goto out; if (NFS_STALE(inode)) goto out; /* pNFS: Attributes aren't updated until we layoutcommit */ if (S_ISREG(inode->i_mode)) { status = pnfs_sync_inode(inode, false); if (status) goto out; } status = -ENOMEM; fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) goto out; nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, inode); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); switch (status) { case -ETIMEDOUT: /* A soft timeout occurred. Use cached information? */ if (server->flags & NFS_MOUNT_SOFTREVAL) status = 0; break; case -ESTALE: if (!S_ISDIR(inode->i_mode)) nfs_set_inode_stale(inode); else nfs_zap_caches(inode); } goto out; } status = nfs_refresh_inode(inode, fattr); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); goto out; } if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); nfs_setsecurity(inode, fattr); dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); out: nfs_free_fattr(fattr); trace_nfs_revalidate_inode_exit(inode, status); return status; } int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) return 0; return nfs_attribute_timeout(inode); } /** * nfs_revalidate_inode - Revalidate the inode attributes * @inode: pointer to inode struct * @flags: cache flags to check * * Updates inode attribute information by retrieving the data from the server. */ int nfs_revalidate_inode(struct inode *inode, unsigned long flags) { if (!nfs_check_cache_invalid(inode, flags)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(NFS_SERVER(inode), inode); } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { int ret; nfs_fscache_invalidate(inode, 0); if (mapping->nrpages != 0) { if (S_ISREG(inode->i_mode)) { ret = nfs_sync_mapping(mapping); if (ret < 0) return ret; } ret = invalidate_inode_pages2(mapping); if (ret < 0) return ret; } nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); return 0; } /** * nfs_clear_invalid_mapping - Conditionally clear a mapping * @mapping: pointer to mapping * * If the NFS_INO_INVALID_DATA inode flag is set, clear the mapping. */ int nfs_clear_invalid_mapping(struct address_space *mapping) { struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); unsigned long *bitlock = &nfsi->flags; int ret = 0; /* * We must clear NFS_INO_INVALID_DATA first to ensure that * invalidations that come in while we're shooting down the mappings * are respected. But, that leaves a race window where one revalidator * can clear the flag, and then another checks it before the mapping * gets invalidated. Fix that by serializing access to this part of * the function. * * At the same time, we need to allow other tasks to see whether we * might be in the middle of invalidating the pages, so we only set * the bit lock here if it looks like we're going to be doing that. */ for (;;) { ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING, nfs_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); if (ret) goto out; spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); continue; } if (nfsi->cache_validity & NFS_INO_INVALID_DATA) break; spin_unlock(&inode->i_lock); goto out; } set_bit(NFS_INO_INVALIDATING, bitlock); smp_wmb(); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; nfs_ooo_clear(nfsi); spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); trace_nfs_invalidate_mapping_exit(inode, ret); clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); smp_mb__after_atomic(); wake_up_bit(bitlock, NFS_INO_INVALIDATING); out: return ret; } bool nfs_mapping_need_revalidate_inode(struct inode *inode) { return nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE) || NFS_STALE(inode); } int nfs_revalidate_mapping_rcu(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long *bitlock = &nfsi->flags; int ret = 0; if (IS_SWAPFILE(inode)) goto out; if (nfs_mapping_need_revalidate_inode(inode)) { ret = -ECHILD; goto out; } spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock) || (nfsi->cache_validity & NFS_INO_INVALID_DATA)) ret = -ECHILD; spin_unlock(&inode->i_lock); out: return ret; } /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode: pointer to host inode * @mapping: pointer to mapping */ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { /* swapfiles are not supposed to be shared. */ if (IS_SWAPFILE(inode)) return 0; if (nfs_mapping_need_revalidate_inode(inode)) { int ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (ret < 0) return ret; } return nfs_clear_invalid_mapping(mapping); } static bool nfs_file_has_writers(struct nfs_inode *nfsi) { struct inode *inode = &nfsi->vfs_inode; if (!S_ISREG(inode->i_mode)) return false; if (list_empty(&nfsi->open_files)) return false; return inode_is_open_for_write(inode); } static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) { return nfs_file_has_writers(nfsi) && nfs_file_io_is_buffered(nfsi); } static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct timespec64 ts; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) && inode_eq_iversion_raw(inode, fattr->pre_change_attr)) { inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); else if (nfs_server_capable(inode, NFS_CAP_XATTR)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); } /* If we have atomic WCC data, we may update some attributes */ ts = inode_get_ctime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) && timespec64_equal(&ts, &fattr->pre_ctime)) { inode_set_ctime_to_ts(inode, fattr->ctime); } ts = inode_get_mtime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec64_equal(&ts, &fattr->pre_mtime)) { inode_set_mtime_to_ts(inode, fattr->mtime); } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && !nfs_have_writebacks(inode)) { trace_nfs_size_wcc(inode, fattr->size); i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } } /** * nfs_check_inode_attributes - verify consistency of the inode attribute cache * @inode: pointer to inode * @fattr: updated attributes * * Verifies the attribute cache. If we have just changed the attributes, * so that fattr carries weak cache consistency data, then it may * also update the ctime/mtime/change_attribute. */ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; unsigned long invalid = 0; struct timespec64 ts; if (nfs_have_delegated_attributes(inode)) return 0; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { /* Only a mounted-on-fileid? Just exit */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) return 0; /* Has the inode gone and changed behind our back? */ } else if (nfsi->fileid != fattr->fileid) { /* Is this perhaps the mounted-on fileid? */ if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && nfsi->fileid == fattr->mounted_on_fileid) return 0; return -ESTALE; } if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) return -ESTALE; if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) invalid |= NFS_INO_INVALID_CHANGE; ts = inode_get_mtime(inode); if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; ts = inode_get_ctime(inode); if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime)) invalid |= NFS_INO_INVALID_CTIME; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize) invalid |= NFS_INO_INVALID_SIZE; } } /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) invalid |= NFS_INO_INVALID_MODE; if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) invalid |= NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) invalid |= NFS_INO_INVALID_OTHER; /* Has the link count changed? */ if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) invalid |= NFS_INO_INVALID_NLINK; ts = inode_get_atime(inode); if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) nfs_set_cache_invalid(inode, invalid); nfsi->read_cache_jiffies = fattr->time_start; return 0; } static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) { return atomic_long_read(&nfs_attr_generation_counter); } unsigned long nfs_inc_attr_generation_counter(void) { return atomic_long_inc_return(&nfs_attr_generation_counter); } EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter); void nfs_fattr_init(struct nfs_fattr *fattr) { fattr->valid = 0; fattr->time_start = jiffies; fattr->gencount = nfs_inc_attr_generation_counter(); fattr->owner_name = NULL; fattr->group_name = NULL; } EXPORT_SYMBOL_GPL(nfs_fattr_init); /** * nfs_fattr_set_barrier * @fattr: attributes * * Used to set a barrier after an attribute was updated. This * barrier ensures that older attributes from RPC calls that may * have raced with our update cannot clobber these new values. * Note that you are still responsible for ensuring that other * operations which change the attribute on the server do not * collide. */ void nfs_fattr_set_barrier(struct nfs_fattr *fattr) { fattr->gencount = nfs_inc_attr_generation_counter(); } struct nfs_fattr *nfs_alloc_fattr(void) { struct nfs_fattr *fattr; fattr = kmalloc(sizeof(*fattr), GFP_KERNEL); if (fattr != NULL) { nfs_fattr_init(fattr); fattr->label = NULL; } return fattr; } EXPORT_SYMBOL_GPL(nfs_alloc_fattr); struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server) { struct nfs_fattr *fattr = nfs_alloc_fattr(); if (!fattr) return NULL; fattr->label = nfs4_label_alloc(server, GFP_KERNEL); if (IS_ERR(fattr->label)) { kfree(fattr); return NULL; } return fattr; } EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label); struct nfs_fh *nfs_alloc_fhandle(void) { struct nfs_fh *fh; fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); if (fh != NULL) fh->size = 0; return fh; } EXPORT_SYMBOL_GPL(nfs_alloc_fhandle); #ifdef NFS_DEBUG /* * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle * in the same way that wireshark does * * @fh: file handle * * For debugging only. */ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) { /* wireshark uses 32-bit AUTODIN crc and does a bitwise * not on the result */ return nfs_fhandle_hash(fh); } EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash); /* * _nfs_display_fhandle - display an NFS file handle on the console * * @fh: file handle to display * @caption: display caption * * For debugging only. */ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) { unsigned short i; if (fh == NULL || fh->size == 0) { printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh); return; } printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n", caption, fh, fh->size, _nfs_display_fhandle_hash(fh)); for (i = 0; i < fh->size; i += 16) { __be32 *pos = (__be32 *)&fh->data[i]; switch ((fh->size - i - 1) >> 2) { case 0: printk(KERN_DEFAULT " %08x\n", be32_to_cpup(pos)); break; case 1: printk(KERN_DEFAULT " %08x %08x\n", be32_to_cpup(pos), be32_to_cpup(pos + 1)); break; case 2: printk(KERN_DEFAULT " %08x %08x %08x\n", be32_to_cpup(pos), be32_to_cpup(pos + 1), be32_to_cpup(pos + 2)); break; default: printk(KERN_DEFAULT " %08x %08x %08x %08x\n", be32_to_cpup(pos), be32_to_cpup(pos + 1), be32_to_cpup(pos + 2), be32_to_cpup(pos + 3)); } } } EXPORT_SYMBOL_GPL(_nfs_display_fhandle); #endif /** * nfs_inode_attrs_cmp_generic - compare attributes * @fattr: attributes * @inode: pointer to inode * * Attempt to divine whether or not an RPC call reply carrying stale * attributes got scheduled after another call carrying updated ones. * Note also the check for wraparound of 'attr_gencount' * * The function returns '1' if it thinks the attributes in @fattr are * more recent than the ones cached in @inode. Otherwise it returns * the value '0'. */ static int nfs_inode_attrs_cmp_generic(const struct nfs_fattr *fattr, const struct inode *inode) { unsigned long attr_gencount = NFS_I(inode)->attr_gencount; return (long)(fattr->gencount - attr_gencount) > 0 || (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0; } /** * nfs_inode_attrs_cmp_monotonic - compare attributes * @fattr: attributes * @inode: pointer to inode * * Attempt to divine whether or not an RPC call reply carrying stale * attributes got scheduled after another call carrying updated ones. * * We assume that the server observes monotonic semantics for * the change attribute, so a larger value means that the attributes in * @fattr are more recent, in which case the function returns the * value '1'. * A return value of '0' indicates no measurable change * A return value of '-1' means that the attributes in @inode are * more recent. */ static int nfs_inode_attrs_cmp_monotonic(const struct nfs_fattr *fattr, const struct inode *inode) { s64 diff = fattr->change_attr - inode_peek_iversion_raw(inode); if (diff > 0) return 1; return diff == 0 ? 0 : -1; } /** * nfs_inode_attrs_cmp_strict_monotonic - compare attributes * @fattr: attributes * @inode: pointer to inode * * Attempt to divine whether or not an RPC call reply carrying stale * attributes got scheduled after another call carrying updated ones. * * We assume that the server observes strictly monotonic semantics for * the change attribute, so a larger value means that the attributes in * @fattr are more recent, in which case the function returns the * value '1'. * A return value of '-1' means that the attributes in @inode are * more recent or unchanged. */ static int nfs_inode_attrs_cmp_strict_monotonic(const struct nfs_fattr *fattr, const struct inode *inode) { return nfs_inode_attrs_cmp_monotonic(fattr, inode) > 0 ? 1 : -1; } /** * nfs_inode_attrs_cmp - compare attributes * @fattr: attributes * @inode: pointer to inode * * This function returns '1' if it thinks the attributes in @fattr are * more recent than the ones cached in @inode. It returns '-1' if * the attributes in @inode are more recent than the ones in @fattr, * and it returns 0 if not sure. */ static int nfs_inode_attrs_cmp(const struct nfs_fattr *fattr, const struct inode *inode) { if (nfs_inode_attrs_cmp_generic(fattr, inode) > 0) return 1; switch (NFS_SERVER(inode)->change_attr_type) { case NFS4_CHANGE_TYPE_IS_UNDEFINED: break; case NFS4_CHANGE_TYPE_IS_TIME_METADATA: if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE)) break; return nfs_inode_attrs_cmp_monotonic(fattr, inode); default: if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE)) break; return nfs_inode_attrs_cmp_strict_monotonic(fattr, inode); } return 0; } /** * nfs_inode_finish_partial_attr_update - complete a previous inode update * @fattr: attributes * @inode: pointer to inode * * Returns '1' if the last attribute update left the inode cached * attributes in a partially unrevalidated state, and @fattr * matches the change attribute of that partial update. * Otherwise returns '0'. */ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, const struct inode *inode) { const unsigned long check_valid = NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_NLINK; unsigned long cache_validity = NFS_I(inode)->cache_validity; enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; if (ctype != NFS4_CHANGE_TYPE_IS_UNDEFINED && !(cache_validity & NFS_INO_INVALID_CHANGE) && (cache_validity & check_valid) != 0 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0) return 1; return 0; } static void nfs_ooo_merge(struct nfs_inode *nfsi, u64 start, u64 end) { int i, cnt; if (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER) /* No point merging anything */ return; if (!nfsi->ooo) { nfsi->ooo = kmalloc(sizeof(*nfsi->ooo), GFP_ATOMIC); if (!nfsi->ooo) { nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; return; } nfsi->ooo->cnt = 0; } /* add this range, merging if possible */ cnt = nfsi->ooo->cnt; for (i = 0; i < cnt; i++) { if (end == nfsi->ooo->gap[i].start) end = nfsi->ooo->gap[i].end; else if (start == nfsi->ooo->gap[i].end) start = nfsi->ooo->gap[i].start; else continue; /* Remove 'i' from table and loop to insert the new range */ cnt -= 1; nfsi->ooo->gap[i] = nfsi->ooo->gap[cnt]; i = -1; } if (start != end) { if (cnt >= ARRAY_SIZE(nfsi->ooo->gap)) { nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; kfree(nfsi->ooo); nfsi->ooo = NULL; return; } nfsi->ooo->gap[cnt].start = start; nfsi->ooo->gap[cnt].end = end; cnt += 1; } nfsi->ooo->cnt = cnt; } static void nfs_ooo_record(struct nfs_inode *nfsi, struct nfs_fattr *fattr) { /* This reply was out-of-order, so record in the * pre/post change id, possibly cancelling * gaps created when iversion was jumpped forward. */ if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) && (fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) nfs_ooo_merge(nfsi, fattr->change_attr, fattr->pre_change_attr); } static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { int attr_cmp = nfs_inode_attrs_cmp(fattr, inode); int ret = 0; trace_nfs_refresh_inode_enter(inode); if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode)) ret = nfs_update_inode(inode, fattr); else { nfs_ooo_record(NFS_I(inode), fattr); if (attr_cmp == 0) ret = nfs_check_inode_attributes(inode, fattr); } trace_nfs_refresh_inode_exit(inode, ret); return ret; } /** * nfs_refresh_inode - try to update the inode attribute cache * @inode: pointer to inode * @fattr: updated attributes * * Check that an RPC call that returned attributes has not overlapped with * other recent updates of the inode metadata, then decide whether it is * safe to do a full update of the inode attributes, or whether just to * call nfs_check_inode_attributes. */ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { int status; if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; spin_lock(&inode->i_lock); status = nfs_refresh_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } EXPORT_SYMBOL_GPL(nfs_refresh_inode); static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr, unsigned int invalid) { if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; return nfs_refresh_inode_locked(inode, fattr); } /** * nfs_post_op_update_inode - try to update the inode attribute cache * @inode: pointer to inode * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. * * NB: if the server didn't return any post op attributes, this * function will force the retrieval of attributes before the next * NFS request. Thus it should be used only for operations that * are expected to change one or more attributes, to avoid * unnecessary NFS requests and trips through nfs_update_inode(). */ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) { int status; spin_lock(&inode->i_lock); nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); return status; } EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache * @inode: pointer to inode * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. Fake up * weak cache consistency data, if none exist. * * This function is mainly designed to be used by the ->write_done() functions. */ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr) { int attr_cmp = nfs_inode_attrs_cmp(fattr, inode); int status; /* Don't do a WCC update if these attributes are already stale */ if (attr_cmp < 0) return 0; if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) { /* Record the pre/post change info before clearing PRECHANGE */ nfs_ooo_record(NFS_I(inode), fattr); fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE | NFS_ATTR_FATTR_PRESIZE | NFS_ATTR_FATTR_PREMTIME | NFS_ATTR_FATTR_PRECTIME); goto out_noforce; } if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { fattr->pre_change_attr = inode_peek_iversion_raw(inode); fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { fattr->pre_ctime = inode_get_ctime(inode); fattr->valid |= NFS_ATTR_FATTR_PRECTIME; } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { fattr->pre_mtime = inode_get_mtime(inode); fattr->valid |= NFS_ATTR_FATTR_PREMTIME; } if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) { fattr->pre_size = i_size_read(inode); fattr->valid |= NFS_ATTR_FATTR_PRESIZE; } out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_BLOCKS); return status; } /** * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache * @inode: pointer to inode * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. Fake up * weak cache consistency data, if none exist. * * This function is mainly designed to be used by the ->write_done() functions. */ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) { int status; spin_lock(&inode->i_lock); nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); /* * Many nfs protocol calls return the new file attributes after * an operation. Here we update the inode to reflect the state * of the server's inode. * * This is a bit tricky because we have to make sure all dirty pages * have been sent off to the server before calling invalidate_inode_pages. * To make sure no other process adds more write requests while we try * our best to flush them, we make them sleep during the attribute refresh. * * A very similar scenario holds for the dir cache. */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; u64 fattr_supported = server->fattr_valid; unsigned long invalid = 0; unsigned long now = jiffies; unsigned long save_cache_validity; bool have_writers = nfs_file_has_buffered_writers(nfsi); bool cache_revalidated = true; bool attr_changed = false; bool have_delegation; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { /* Only a mounted-on-fileid? Just exit */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) return 0; /* Has the inode gone and changed behind our back? */ } else if (nfsi->fileid != fattr->fileid) { /* Is this perhaps the mounted-on fileid? */ if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && nfsi->fileid == fattr->mounted_on_fileid) return 0; printk(KERN_ERR "NFS: server %s error: fileid changed\n" "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)nfsi->fileid, (long long)fattr->fileid); goto out_err; } /* * Make sure the inode's type hasn't changed. */ if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) { /* * Big trouble! The inode has become a different object. */ printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n", __func__, inode->i_ino, inode->i_mode, fattr->mode); goto out_err; } /* Update the fsid? */ if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && !nfs_fsid_equal(&server->fsid, &fattr->fsid) && !IS_AUTOMOUNT(inode)) server->fsid = fattr->fsid; /* Save the delegation state before clearing cache_validity */ have_delegation = nfs_have_delegated_attributes(inode); /* * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->time_start; /* Fix up any delegated attributes in the struct nfs_fattr */ nfs_fattr_fixup_delegated(inode, fattr); save_cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED | NFS_INO_INVALID_BLOCKS); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); if (pnfs_layoutcommit_outstanding(inode)) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS); cache_revalidated = false; } /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { if (!have_writers && nfsi->ooo && nfsi->ooo->cnt == 1 && nfsi->ooo->gap[0].end == inode_peek_iversion_raw(inode)) { /* There is one remaining gap that hasn't been * merged into iversion - do that now. */ inode_set_iversion_raw(inode, nfsi->ooo->gap[0].start); kfree(nfsi->ooo); nfsi->ooo = NULL; } if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { /* Could it be a race with writeback? */ if (!(have_writers || have_delegation)) { invalid |= NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_XATTR; /* Force revalidate of all attributes */ save_cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); attr_changed = true; dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } else if (!have_delegation) { nfs_ooo_record(nfsi, fattr); nfs_ooo_merge(nfsi, inode_peek_iversion_raw(inode), fattr->change_attr); } inode_set_iversion_raw(inode, fattr->change_attr); } } else { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CHANGE; if (!have_delegation || (nfsi->cache_validity & NFS_INO_INVALID_CHANGE) != 0) cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode_set_mtime_to_ts(inode, fattr->mtime); else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MTIME; if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CTIME; /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); if (new_isize != cur_isize && !have_delegation) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { trace_nfs_size_update(inode, new_isize); i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_DATA; } } if (new_isize == 0 && !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED | NFS_ATTR_FATTR_BLOCKS_USED))) { fattr->du.nfs3.used = 0; fattr->valid |= NFS_ATTR_FATTR_SPACE_USED; } } else nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_SIZE; if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode_set_atime_to_ts(inode, fattr->atime); else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATIME; if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { umode_t newmode = inode->i_mode & S_IFMT; newmode |= fattr->mode & S_IALLUGO; inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; } } else if (fattr_supported & NFS_ATTR_FATTR_MODE) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MODE; if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_OTHER; if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_OTHER; if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) set_nlink(inode, fattr->nlink); } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_NLINK; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_BLOCKS; if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_BLOCKS; /* Update attrtimeo value if we're out of the unstable period */ if (attr_changed) { nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { if (cache_revalidated) { if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { nfsi->attrtimeo <<= 1; if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); } nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ if ((long)(fattr->gencount - nfsi->attr_gencount) > 0) nfsi->attr_gencount = fattr->gencount; } /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); return 0; out_err: /* * No need to worry about unhashing the dentry, as the * lookup validation will know that the inode is bad. * (But we fall through to invalidate the caches.) */ nfs_set_inode_stale_locked(inode); return -ESTALE; } struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; nfsi = alloc_inode_sb(sb, nfs_inode_cachep, GFP_KERNEL); if (!nfsi) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; nfsi->ooo = NULL; #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_V4_2 nfsi->xattr_cache = NULL; #endif nfs_netfs_inode_init(nfsi); return &nfsi->vfs_inode; } EXPORT_SYMBOL_GPL(nfs_alloc_inode); void nfs_free_inode(struct inode *inode) { kfree(NFS_I(inode)->ooo); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } EXPORT_SYMBOL_GPL(nfs_free_inode); static inline void nfs4_init_once(struct nfs_inode *nfsi) { #if IS_ENABLED(CONFIG_NFS_V4) INIT_LIST_HEAD(&nfsi->open_states); nfsi->delegation = NULL; init_rwsem(&nfsi->rwsem); nfsi->layout = NULL; #endif } static void init_once(void *foo) { struct nfs_inode *nfsi = foo; inode_init_once(&nfsi->vfs_inode); INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); nfs4_init_once(nfsi); } static int __init nfs_init_inodecache(void) { nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), init_once); if (nfs_inode_cachep == NULL) return -ENOMEM; return 0; } static void nfs_destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(nfs_inode_cachep); } struct workqueue_struct *nfslocaliod_workqueue; struct workqueue_struct *nfsiod_workqueue; EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* * Destroy the nfsiod workqueues */ static void nfsiod_stop(void) { struct workqueue_struct *wq; wq = nfsiod_workqueue; if (wq != NULL) { nfsiod_workqueue = NULL; destroy_workqueue(wq); } #if IS_ENABLED(CONFIG_NFS_LOCALIO) wq = nfslocaliod_workqueue; if (wq != NULL) { nfslocaliod_workqueue = NULL; destroy_workqueue(wq); } #endif /* CONFIG_NFS_LOCALIO */ } /* * Start the nfsiod workqueues */ static int nfsiod_start(void) { dprintk("RPC: creating workqueue nfsiod\n"); nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (nfsiod_workqueue == NULL) return -ENOMEM; #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* * localio writes need to use a normal (non-memreclaim) workqueue. * When we start getting low on space, XFS goes and calls flush_work() on * a non-memreclaim work queue, which causes a priority inversion problem. */ dprintk("RPC: creating workqueue nfslocaliod\n"); nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0); if (unlikely(nfslocaliod_workqueue == NULL)) { nfsiod_stop(); return -ENOMEM; } #endif /* CONFIG_NFS_LOCALIO */ return 0; } unsigned int nfs_net_id; EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); nfs_clients_init(net); if (!rpc_proc_register(net, &nn->rpcstats)) { nfs_clients_exit(net); return -ENOMEM; } return nfs_fs_proc_net_init(net); } static void nfs_net_exit(struct net *net) { rpc_proc_unregister(net, "nfs"); nfs_fs_proc_net_exit(net); nfs_clients_exit(net); } static struct pernet_operations nfs_net_ops = { .init = nfs_net_init, .exit = nfs_net_exit, .id = &nfs_net_id, .size = sizeof(struct nfs_net), }; /* * Initialize NFS */ static int __init init_nfs_fs(void) { int err; err = nfs_sysfs_init(); if (err < 0) goto out10; err = register_pernet_subsys(&nfs_net_ops); if (err < 0) goto out9; err = nfsiod_start(); if (err) goto out7; err = nfs_fs_proc_init(); if (err) goto out6; err = nfs_init_nfspagecache(); if (err) goto out5; err = nfs_init_inodecache(); if (err) goto out4; err = nfs_init_readpagecache(); if (err) goto out3; err = nfs_init_writepagecache(); if (err) goto out2; err = nfs_init_directcache(); if (err) goto out1; err = register_nfs_fs(); if (err) goto out0; return 0; out0: nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); out2: nfs_destroy_readpagecache(); out3: nfs_destroy_inodecache(); out4: nfs_destroy_nfspagecache(); out5: nfs_fs_proc_exit(); out6: nfsiod_stop(); out7: unregister_pernet_subsys(&nfs_net_ops); out9: nfs_sysfs_exit(); out10: return err; } static void __exit exit_nfs_fs(void) { nfs_destroy_directcache(); nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); unregister_pernet_subsys(&nfs_net_ops); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); nfs_sysfs_exit(); } /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); MODULE_DESCRIPTION("NFS client support"); MODULE_LICENSE("GPL"); module_param(enable_ino64, bool, 0644); module_init(init_nfs_fs) module_exit(exit_nfs_fs) |
65 63 64 65 61 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_proto_udp.c: UDP load balancing support for IPVS * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> * Network name space (netns) aware. */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/udp.h> #include <linux/indirect_call_wrapper.h> #include <net/ip_vs.h> #include <net/ip.h> #include <net/ip6_checksum.h> static int udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); static int udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { struct ip_vs_service *svc; struct udphdr _udph, *uh; __be16 _ports[2], *ports = NULL; if (likely(!ip_vs_iph_icmp(iph))) { /* IPv6 fragments, only first fragment will hit this */ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); if (uh) ports = &uh->source; } else { ports = skb_header_pointer( skb, iph->len, sizeof(_ports), &_ports); } if (!ports) { *verdict = NF_DROP; return 0; } if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); else svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->saddr, ports[0]); if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( */ *verdict = NF_DROP; return 0; } /* * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; return 0; } } /* NF_ACCEPT */ return 1; } static inline void udp_fast_csum_update(int af, struct udphdr *uhdr, const union nf_inet_addr *oldip, const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) uhdr->check = csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ip_vs_check_diff2(oldport, newport, ~csum_unfold(uhdr->check)))); else #endif uhdr->check = csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldport, newport, ~csum_unfold(uhdr->check)))); if (!uhdr->check) uhdr->check = CSUM_MANGLED_0; } static inline void udp_partial_csum_update(int af, struct udphdr *uhdr, const union nf_inet_addr *oldip, const union nf_inet_addr *newip, __be16 oldlen, __be16 newlen) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) uhdr->check = ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ip_vs_check_diff2(oldlen, newlen, csum_unfold(uhdr->check)))); else #endif uhdr->check = ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldlen, newlen, csum_unfold(uhdr->check)))); } INDIRECT_CALLABLE_SCOPE int udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; unsigned int udphoff = iph->len; bool payload_csum = false; int oldlen; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, udphoff + sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!udp_csum_check(cp->af, skb, pp)) return 0; /* * Call application helper if needed */ if (!(ret = ip_vs_app_pkt_out(cp, skb, iph))) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 1) oldlen = skb->len - udphoff; else payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; udph->source = cp->vport; /* * Adjust UDP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, htons(oldlen), htons(skb->len - udphoff)); } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) udph->check = csum_ipv6_magic(&cp->vaddr.in6, &cp->caddr.in6, skb->len - udphoff, cp->protocol, skb->csum); else #endif udph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); } return 1; } static int udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; unsigned int udphoff = iph->len; bool payload_csum = false; int oldlen; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, udphoff + sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!udp_csum_check(cp->af, skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn */ if (!(ret = ip_vs_app_pkt_in(cp, skb, iph))) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 1) oldlen = skb->len - udphoff; else payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; udph->dest = cp->dport; /* * Adjust UDP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, htons(oldlen), htons(skb->len - udphoff)); } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) udph->check = csum_ipv6_magic(&cp->caddr.in6, &cp->daddr.in6, skb->len - udphoff, cp->protocol, skb->csum); else #endif udph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } static int udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { struct udphdr _udph, *uh; unsigned int udphoff; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) udphoff = sizeof(struct ipv6hdr); else #endif udphoff = ip_hdrlen(skb); uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); if (uh == NULL) return 0; if (uh->check != 0) { switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); fallthrough; case CHECKSUM_COMPLETE: #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - udphoff, ipv6_hdr(skb)->nexthdr, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } } else #endif if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len - udphoff, ip_hdr(skb)->protocol, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } break; default: /* No need to checksum. */ break; } } return 1; } static inline __u16 udp_app_hashkey(__be16 port) { return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) & UDP_APP_TAB_MASK; } static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); hash = udp_app_hashkey(port); list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); atomic_inc(&pd->appcnt); out: return ret; } static void udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); } static int udp_app_conn_bind(struct ip_vs_conn *cp) { struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; /* Default binding: bind app only for NAT */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) return 0; /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", __func__, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), inc->name, ntohs(inc->port)); cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); break; } } return result; } static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = 5*60*HZ, [IP_VS_UDP_S_LAST] = 2*HZ, }; static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = "UDP", [IP_VS_UDP_S_LAST] = "BUG!", }; static const char * udp_state_name(int state) { if (state >= IP_VS_UDP_S_LAST) return "ERR!"; return udp_state_name_table[state] ? udp_state_name_table[state] : "?"; } static void udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { if (unlikely(!pd)) { pr_err("UDP no ns data\n"); return; } cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; if (direction == IP_VS_DIR_OUTPUT) ip_vs_control_assure_ct(cp); } static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); if (!pd->timeout_table) return -ENOMEM; return 0; } static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } struct ip_vs_protocol ip_vs_protocol_udp = { .name = "UDP", .protocol = IPPROTO_UDP, .num_states = IP_VS_UDP_S_LAST, .dont_defrag = 0, .init = NULL, .exit = NULL, .init_netns = __udp_init, .exit_netns = __udp_exit, .conn_schedule = udp_conn_schedule, .conn_in_get = ip_vs_conn_in_get_proto, .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .state_transition = udp_state_transition, .state_name = udp_state_name, .register_app = udp_register_app, .unregister_app = udp_unregister_app, .app_conn_bind = udp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, }; |
3 4 5 4 3 4 2 1 3 1 1 1 7 4 3 3 2 2 1 3 3 14 1 14 1 16 5 10 2 1 11 12 13 1 2 10 2 10 1 2 1 3 7 10 4 4 2 2 1 3 5 6 6 1 1 11 1 2 1 1 1 6 1 4 1 5 1 1 1 1 1 4 3 4 4 1 2 1 1 1 4 1 2 1 6 6 5 3 2 5 5 4 1 4 2 2 14 11 5 5 10 12 1 11 1 2 2 3 4 3 1 1 1 1 20 1 20 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 12 1 1 1 1 1 1 1 1 1 1 1 1 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 | /* * af_llc.c - LLC User Interface SAPs * Description: * Functions in this module are implementation of socket based llc * communications for the Linux operating system. Support of llc class * one and class two is provided via SOCK_DGRAM and SOCK_STREAM * respectively. * * An llc2 connection is (mac + sap), only one llc2 sap connection * is allowed per mac. Though one sap may have multiple mac + sap * connections. * * Copyright (c) 2001 by Jay Schulist <jschlst@samba.org> * 2002-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <net/llc.h> #include <net/llc_sap.h> #include <net/llc_pdu.h> #include <net/llc_conn.h> #include <net/tcp_states.h> /* remember: uninitialized global data is zeroed because its in .bss */ static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static const struct proto_ops llc_ui_ops; static bool llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #if 0 #define dprintk(args...) printk(KERN_DEBUG args) #else #define dprintk(args...) do {} while (0) #endif /* Maybe we'll add some more in the future. */ #define LLC_CMSG_PKTINFO 1 /** * llc_ui_next_link_no - return the next unused link number for a sap * @sap: Address of sap to get link number from. * * Return the next unused link number for a given sap. */ static inline u16 llc_ui_next_link_no(int sap) { return llc_ui_sap_link_no_max[sap]++; } /** * llc_proto_type - return eth protocol for ARP header type * @arphrd: ARP header type. * * Given an ARP header type return the corresponding ethernet protocol. */ static inline __be16 llc_proto_type(u16 arphrd) { return htons(ETH_P_802_2); } /** * llc_ui_addr_null - determines if a address structure is null * @addr: Address to test if null. */ static inline u8 llc_ui_addr_null(struct sockaddr_llc *addr) { return !memcmp(addr, &llc_ui_addrnull, sizeof(*addr)); } /** * llc_ui_header_len - return length of llc header based on operation * @sk: Socket which contains a valid llc socket type. * @addr: Complete sockaddr_llc structure received from the user. * * Provide the length of the llc header depending on what kind of * operation the user would like to perform and the type of socket. * Returns the correct llc header length. */ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) { u8 rc = LLC_PDU_LEN_U; if (addr->sllc_test) rc = LLC_PDU_LEN_U; else if (addr->sllc_xid) /* We need to expand header to sizeof(struct llc_xid_info) * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header * as XID PDU. In llc_ui_sendmsg() we reserved header size and then * filled all other space with user data. If we won't reserve this * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data */ rc = LLC_PDU_LEN_U_XID; else if (sk->sk_type == SOCK_STREAM) rc = LLC_PDU_LEN_I; return rc; } /** * llc_ui_send_data - send data via reliable llc2 connection * @sk: Connection the socket is using. * @skb: Data the user wishes to send. * @noblock: can we block waiting for data? * * Send data via reliable llc2 connection. * Returns 0 upon success, non-zero if action did not succeed. * * This function always consumes a reference to the skb. */ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) { struct llc_sock* llc = llc_sk(sk); if (unlikely(llc_data_accept_state(llc->state) || llc->remote_busy_flag || llc->p_flag)) { long timeout = sock_sndtimeo(sk, noblock); int rc; rc = llc_ui_wait_for_busy_core(sk, timeout); if (rc) { kfree_skb(skb); return rc; } } return llc_build_and_send_pkt(sk, skb); } static void llc_ui_sk_init(struct socket *sock, struct sock *sk) { sock_graft(sk, sock); sk->sk_type = sock->type; sock->ops = &llc_ui_ops; } static struct proto llc_proto = { .name = "LLC", .owner = THIS_MODULE, .obj_size = sizeof(struct llc_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, }; /** * llc_ui_create - alloc and init a new llc_ui socket * @net: network namespace (must be default network) * @sock: Socket to initialize and attach allocated sk to. * @protocol: Unused. * @kern: on behalf of kernel or userspace * * Allocate and initialize a new llc_ui socket, validate the user wants a * socket type we have available. * Returns 0 upon success, negative upon failure. */ static int llc_ui_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); } } return rc; } /** * llc_ui_release - shutdown socket * @sock: Socket to release. * * Shutdown and deallocate an existing socket. */ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; if (unlikely(sk == NULL)) goto out; sock_hold(sk); lock_sock(sk); llc = llc_sk(sk); dprintk("%s: closing local(%02X) remote(%02X)\n", __func__, llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); if (!sock_flag(sk, SOCK_ZAPPED)) { struct llc_sap *sap = llc->sap; /* Hold this for release_sock(), so that llc_backlog_rcv() * could still use it. */ llc_sap_hold(sap); llc_sap_remove_socket(llc->sap, sk); release_sock(sk); llc_sap_put(sap); } else { release_sock(sk); } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); sock_orphan(sk); sock->sk = NULL; llc_sk_free(sk); out: return 0; } /** * llc_ui_autoport - provide dynamically allocate SAP number * * Provide the caller with a dynamically allocated SAP number according * to the rules that are set in this function. Returns: 0, upon failure, * SAP number otherwise. */ static int llc_ui_autoport(void) { struct llc_sap *sap; int i, tries = 0; while (tries < LLC_SAP_DYN_TRIES) { for (i = llc_ui_sap_last_autoport; i < LLC_SAP_DYN_STOP; i += 2) { sap = llc_sap_find(i); if (!sap) { llc_ui_sap_last_autoport = i + 2; goto out; } llc_sap_put(sap); } llc_ui_sap_last_autoport = LLC_SAP_DYN_START; tries++; } i = 0; out: return i; } /** * llc_ui_autobind - automatically bind a socket to a sap * @sock: socket to bind * @addr: address to connect to * * Used by llc_ui_connect and llc_ui_sendmsg when the user hasn't * specifically used llc_ui_bind to bind to an specific address/sap * * Returns: 0 upon success, negative otherwise. */ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; if (!sock_flag(sk, SOCK_ZAPPED)) goto out; if (!addr->sllc_arphrd) addr->sllc_arphrd = ARPHRD_ETHER; if (addr->sllc_arphrd != ARPHRD_ETHER) goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (dev && addr->sllc_arphrd != dev->type) { dev_put(dev); dev = NULL; } } else dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); if (!dev) goto out; rc = -EUSERS; llc->laddr.lsap = llc_ui_autoport(); if (!llc->laddr.lsap) goto out; rc = -EBUSY; /* some other network layer is using the sap */ sap = llc_sap_open(llc->laddr.lsap, NULL); if (!sap) goto out; /* Note: We do not expect errors from this point. */ llc->dev = dev; netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); dev = NULL; memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ llc_sap_add_socket(sap, sk); sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out: dev_put(dev); return rc; } /** * llc_ui_bind - bind a socket to a specific address. * @sock: Socket to bind an address to. * @uaddr: Address the user wants the socket bound to. * @addrlen: Length of the uaddr structure. * * Bind a socket to a specific address. For llc a user is able to bind to * a specific sap only or mac + sap. * If the user desires to bind to a specific mac + sap, it is possible to * have multiple sap connections via multiple macs. * Bind and autobind for that matter must enforce the correct sap usage * otherwise all hell will break loose. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) { struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (!addr->sllc_arphrd) addr->sllc_arphrd = ARPHRD_ETHER; if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER)) goto out; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); if (dev) { if (is_zero_ether_addr(addr->sllc_mac)) memcpy(addr->sllc_mac, dev->dev_addr, IFHWADDRLEN); if (addr->sllc_arphrd != dev->type || !ether_addr_equal(addr->sllc_mac, dev->dev_addr)) { rc = -EINVAL; dev = NULL; } } } else { dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); } dev_hold(dev); rcu_read_unlock(); if (!dev) goto out; if (!addr->sllc_sap) { rc = -EUSERS; addr->sllc_sap = llc_ui_autoport(); if (!addr->sllc_sap) goto out; } sap = llc_sap_find(addr->sllc_sap); if (!sap) { sap = llc_sap_open(addr->sllc_sap, NULL); rc = -EBUSY; /* some other network layer is using the sap */ if (!sap) goto out; } else { struct llc_addr laddr, daddr; struct sock *ask; memset(&laddr, 0, sizeof(laddr)); memset(&daddr, 0, sizeof(daddr)); /* * FIXME: check if the address is multicast, * only SOCK_DGRAM can do this. */ memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN); laddr.lsap = addr->sllc_sap; rc = -EADDRINUSE; /* mac + sap clash. */ ask = llc_lookup_established(sap, &daddr, &laddr, &init_net); if (ask) { sock_put(ask); goto out_put; } } /* Note: We do not expect errors from this point. */ llc->dev = dev; netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); dev = NULL; llc->laddr.lsap = addr->sllc_sap; memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ llc_sap_add_socket(sap, sk); sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out_put: llc_sap_put(sap); out: dev_put(dev); release_sock(sk); return rc; } /** * llc_ui_shutdown - shutdown a connect llc2 socket. * @sock: Socket to shutdown. * @how: What part of the socket to shutdown. * * Shutdown a connected llc2 socket. Currently this function only supports * shutting down both sends and receives (2), we could probably make this * function such that a user can shutdown only half the connection but not * right now. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int rc = -ENOTCONN; lock_sock(sk); if (unlikely(sk->sk_state != TCP_ESTABLISHED)) goto out; rc = -EINVAL; if (how != 2) goto out; rc = llc_send_disc(sk); if (!rc) rc = llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); out: release_sock(sk); return rc; } /** * llc_ui_connect - Connect to a remote llc2 mac + sap. * @sock: Socket which will be connected to the remote destination. * @uaddr: Remote and possibly the local address of the new connection. * @addrlen: Size of uaddr structure. * @flags: Operational flags specified by the user. * * Connect to a remote llc2 mac + sap. The caller must specify the * destination mac and address to connect to. If the user hasn't previously * called bind(2) with a smac the address of the first interface of the * specified arp type will be used. * This function will autobind if user did not previously call bind. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; int rc = -EINVAL; lock_sock(sk); if (unlikely(addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (unlikely(addr->sllc_family != AF_LLC)) goto out; if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EALREADY; if (unlikely(sock->state == SS_CONNECTING)) goto out; /* bind connection to sap if user hasn't done it. */ if (sock_flag(sk, SOCK_ZAPPED)) { /* bind to sap with null dev, exclusive */ rc = llc_ui_autobind(sock, addr); if (rc) goto out; } llc->daddr.lsap = addr->sllc_sap; memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap); rc = llc_establish_connection(sk, llc->dev->dev_addr, addr->sllc_mac, addr->sllc_sap); if (rc) { dprintk("%s: llc_ui_send_conn failed :-(\n", __func__); sock->state = SS_UNCONNECTED; sk->sk_state = TCP_CLOSE; goto out; } if (sk->sk_state == TCP_SYN_SENT) { const long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) goto out; rc = sock_intr_errno(timeo); if (signal_pending(current)) goto out; } if (sk->sk_state == TCP_CLOSE) goto sock_error; sock->state = SS_CONNECTED; rc = 0; out: release_sock(sk); return rc; sock_error: rc = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; goto out; } /** * llc_ui_listen - allow a normal socket to accept incoming connections * @sock: Socket to allow incoming connections on. * @backlog: Number of connections to queue. * * Allow a normal socket to accept incoming connections. * Returns 0 upon success, negative otherwise. */ static int llc_ui_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int rc = -EINVAL; lock_sock(sk); if (unlikely(sock->state != SS_UNCONNECTED)) goto out; rc = -EOPNOTSUPP; if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EAGAIN; if (sock_flag(sk, SOCK_ZAPPED)) goto out; rc = 0; if (!(unsigned int)backlog) /* BSDism */ backlog = 1; sk->sk_max_ack_backlog = backlog; if (sk->sk_state != TCP_LISTEN) { sk->sk_ack_backlog = 0; sk->sk_state = TCP_LISTEN; } sk->sk_socket->flags |= __SO_ACCEPTCON; out: release_sock(sk); return rc; } static int llc_ui_wait_for_disc(struct sock *sk, long timeout) { DEFINE_WAIT_FUNC(wait, woken_wake_function); int rc = 0; add_wait_queue(sk_sleep(sk), &wait); while (1) { if (sk_wait_event(sk, &timeout, READ_ONCE(sk->sk_state) == TCP_CLOSE, &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; rc = 0; } remove_wait_queue(sk_sleep(sk), &wait); return rc; } static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) { DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(sk_sleep(sk), &wait); while (1) { if (sk_wait_event(sk, &timeout, READ_ONCE(sk->sk_state) != TCP_SYN_SENT, &wait)) break; if (signal_pending(current) || !timeout) break; } remove_wait_queue(sk_sleep(sk), &wait); return timeout; } static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct llc_sock *llc = llc_sk(sk); int rc; add_wait_queue(sk_sleep(sk), &wait); while (1) { rc = 0; if (sk_wait_event(sk, &timeout, (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN) || (!llc_data_accept_state(llc->state) && !llc->remote_busy_flag && !llc->p_flag), &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; } remove_wait_queue(sk_sleep(sk), &wait); return rc; } static int llc_wait_data(struct sock *sk, long timeo) { int rc; while (1) { /* * POSIX 1003.1g mandates this order. */ rc = sock_error(sk); if (rc) break; rc = 0; if (sk->sk_shutdown & RCV_SHUTDOWN) break; rc = -EAGAIN; if (!timeo) break; rc = sock_intr_errno(timeo); if (signal_pending(current)) break; rc = 0; if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; } static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) { struct llc_sock *llc = llc_sk(skb->sk); if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); put_cmsg(msg, SOL_LLC, LLC_OPT_PKTINFO, sizeof(info), &info); } } /** * llc_ui_accept - accept a new incoming connection. * @sock: Socket which connections arrive on. * @newsock: Socket to move incoming connection to. * @arg: User specified arguments * * Accept a new incoming connection. * Returns 0 upon success, negative otherwise. */ static int llc_ui_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { struct sock *sk = sock->sk, *newsk; struct llc_sock *llc, *newllc; struct sk_buff *skb; int rc = -EOPNOTSUPP; dprintk("%s: accepting on %02X\n", __func__, llc_sk(sk)->laddr.lsap); lock_sock(sk); if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EINVAL; if (unlikely(sock->state != SS_UNCONNECTED || sk->sk_state != TCP_LISTEN)) goto out; /* wait for a connection to arrive. */ if (skb_queue_empty(&sk->sk_receive_queue)) { rc = llc_wait_data(sk, sk->sk_rcvtimeo); if (rc) goto out; } dprintk("%s: got a new connection on %02X\n", __func__, llc_sk(sk)->laddr.lsap); skb = skb_dequeue(&sk->sk_receive_queue); rc = -EINVAL; if (!skb->sk) goto frees; rc = 0; newsk = skb->sk; /* attach connection to a new socket. */ llc_ui_sk_init(newsock, newsk); sock_reset_flag(newsk, SOCK_ZAPPED); newsk->sk_state = TCP_ESTABLISHED; newsock->state = SS_CONNECTED; llc = llc_sk(sk); newllc = llc_sk(newsk); memcpy(&newllc->addr, &llc->addr, sizeof(newllc->addr)); newllc->link = llc_ui_next_link_no(newllc->laddr.lsap); /* put original socket back into a clean listen state. */ sk->sk_state = TCP_LISTEN; sk_acceptq_removed(sk); dprintk("%s: ok success on %02X, client on %02X\n", __func__, llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap); frees: kfree_skb(skb); out: release_sock(sk); return rc; } /** * llc_ui_recvmsg - copy received data to the socket user. * @sock: Socket to copy data from. * @msg: Various user space related information. * @len: Size of user buffer. * @flags: User specified flags. * * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name); const int nonblock = flags & MSG_DONTWAIT; struct sk_buff *skb = NULL; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); size_t copied = 0; u32 peek_seq = 0; u32 *seq, skb_len; unsigned long used; int target; /* Read at least this many bytes */ long timeo; lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) goto out; timeo = sock_rcvtimeo(sk, nonblock); seq = &llc->copied_seq; if (flags & MSG_PEEK) { peek_seq = llc->copied_seq; seq = &peek_seq; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); copied = 0; do { u32 offset; /* * We need to check signals first, to get correct SIGURG * handling. FIXME: Need to check this doesn't impact 1003.1g * and move it down to the bottom of the loop */ if (signal_pending(current)) { if (copied) break; copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } /* Next get a buffer. */ skb = skb_peek(&sk->sk_receive_queue); if (skb) { offset = *seq; goto found_ok_skb; } /* Well, if we have backlog, try to process it now yet. */ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || (flags & MSG_PEEK)) break; } else { if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_CLOSE) { if (!sock_flag(sk, SOCK_DONE)) { /* * This occurs when user tries to read * from never connected socket. */ copied = -ENOTCONN; break; } break; } if (!timeo) { copied = -EAGAIN; break; } } if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", current->comm, task_pid_nr(current)); peek_seq = llc->copied_seq; } continue; found_ok_skb: skb_len = skb->len; /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) used = len; if (!(flags & MSG_TRUNC)) { int rc = skb_copy_datagram_msg(skb, offset, msg, used); if (rc) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } *seq += used; copied += used; len -= used; /* For non stream protcols we get one packet per recvmsg call */ if (sk->sk_type != SOCK_STREAM) goto copy_uaddr; if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); *seq = 0; } /* Partial read */ if (used + offset < skb_len) continue; } while (len > 0); out: release_sock(sk); return copied; copy_uaddr: if (uaddr != NULL && skb != NULL) { memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); } if (llc_sk(sk)->cmsg_flags) llc_cmsg_rcv(msg, skb); if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); *seq = 0; } goto out; } /** * llc_ui_sendmsg - Transmit data provided by the socket user. * @sock: Socket to transmit data from. * @msg: Various user related information. * @len: Length of data to transmit. * * Transmit data provided by the socket user. * Returns non-negative upon success, negative otherwise. */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; int rc = -EINVAL, copied = 0, hdrlen, hh_len; struct sk_buff *skb = NULL; struct net_device *dev; size_t size = 0; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); lock_sock(sk); if (addr) { if (msg->msg_namelen < sizeof(*addr)) goto out; } else { if (llc_ui_addr_null(&llc->addr)) goto out; addr = &llc->addr; } /* must bind connection to sap if user hasn't done it. */ if (sock_flag(sk, SOCK_ZAPPED)) { /* bind to sap with null dev, exclusive. */ rc = llc_ui_autobind(sock, addr); if (rc) goto out; } dev = llc->dev; hh_len = LL_RESERVED_SPACE(dev); hdrlen = llc_ui_header_len(sk, addr); size = hdrlen + len; size = min_t(size_t, size, READ_ONCE(dev->mtu)); copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc); lock_sock(sk); if (!skb) goto out; if (sock_flag(sk, SOCK_ZAPPED) || llc->dev != dev || hdrlen != llc_ui_header_len(sk, addr) || hh_len != LL_RESERVED_SPACE(dev) || size > READ_ONCE(dev->mtu)) goto out; skb->dev = dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hh_len + hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) { llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); skb = NULL; goto out; } if (addr->sllc_test) { llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); skb = NULL; goto out; } if (addr->sllc_xid) { llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); skb = NULL; goto out; } rc = -ENOPROTOOPT; if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua)) goto out; rc = llc_ui_send_data(sk, skb, noblock); skb = NULL; out: kfree_skb(skb); if (rc) dprintk("%s: failed sending from %02X to %02X: %d\n", __func__, llc->laddr.lsap, llc->daddr.lsap, rc); release_sock(sk); return rc ? : copied; } /** * llc_ui_getname - return the address info of a socket * @sock: Socket to get address of. * @uaddr: Address structure to return information. * @peer: Does user want local or remote address information. * * Return the address information of a socket. */ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) goto out; if(llc->dev) sllc.sllc_arphrd = llc->dev->type; sllc.sllc_sap = llc->daddr.lsap; memcpy(&sllc.sllc_mac, &llc->daddr.mac, IFHWADDRLEN); } else { rc = -EINVAL; if (!llc->sap) goto out; sllc.sllc_sap = llc->sap->laddr.lsap; if (llc->dev) { sllc.sllc_arphrd = llc->dev->type; memcpy(&sllc.sllc_mac, llc->dev->dev_addr, IFHWADDRLEN); } } sllc.sllc_family = AF_LLC; memcpy(uaddr, &sllc, sizeof(sllc)); rc = sizeof(sllc); out: release_sock(sk); return rc; } /** * llc_ui_ioctl - io controls for PF_LLC * @sock: Socket to get/set info * @cmd: command * @arg: optional argument for cmd * * get/set info on llc sockets */ static int llc_ui_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return -ENOIOCTLCMD; } /** * llc_ui_setsockopt - set various connection specific parameters. * @sock: Socket to set options on. * @level: Socket level user is requesting operations on. * @optname: Operation name. * @optval: User provided operation data. * @optlen: Length of optval. * * Set various connection specific parameters. */ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); unsigned int opt; int rc = -EINVAL; lock_sock(sk); if (unlikely(level != SOL_LLC || optlen != sizeof(int))) goto out; rc = copy_from_sockptr(&opt, optval, sizeof(opt)); if (rc) goto out; rc = -EINVAL; switch (optname) { case LLC_OPT_RETRY: if (opt > LLC_OPT_MAX_RETRY) goto out; llc->n2 = opt; break; case LLC_OPT_SIZE: if (opt > LLC_OPT_MAX_SIZE) goto out; llc->n1 = opt; break; case LLC_OPT_ACK_TMR_EXP: if (opt > LLC_OPT_MAX_ACK_TMR_EXP) goto out; llc->ack_timer.expire = opt * HZ; break; case LLC_OPT_P_TMR_EXP: if (opt > LLC_OPT_MAX_P_TMR_EXP) goto out; llc->pf_cycle_timer.expire = opt * HZ; break; case LLC_OPT_REJ_TMR_EXP: if (opt > LLC_OPT_MAX_REJ_TMR_EXP) goto out; llc->rej_sent_timer.expire = opt * HZ; break; case LLC_OPT_BUSY_TMR_EXP: if (opt > LLC_OPT_MAX_BUSY_TMR_EXP) goto out; llc->busy_state_timer.expire = opt * HZ; break; case LLC_OPT_TX_WIN: if (opt > LLC_OPT_MAX_WIN) goto out; llc->k = opt; break; case LLC_OPT_RX_WIN: if (opt > LLC_OPT_MAX_WIN) goto out; llc->rw = opt; break; case LLC_OPT_PKTINFO: if (opt) llc->cmsg_flags |= LLC_CMSG_PKTINFO; else llc->cmsg_flags &= ~LLC_CMSG_PKTINFO; break; default: rc = -ENOPROTOOPT; goto out; } rc = 0; out: release_sock(sk); return rc; } /** * llc_ui_getsockopt - get connection specific socket info * @sock: Socket to get information from. * @level: Socket level user is requesting operations on. * @optname: Operation name. * @optval: Variable to return operation data in. * @optlen: Length of optval. * * Get connection specific socket information. */ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); int val = 0, len = 0, rc = -EINVAL; lock_sock(sk); if (unlikely(level != SOL_LLC)) goto out; rc = get_user(len, optlen); if (rc) goto out; rc = -EINVAL; if (len != sizeof(int)) goto out; switch (optname) { case LLC_OPT_RETRY: val = llc->n2; break; case LLC_OPT_SIZE: val = llc->n1; break; case LLC_OPT_ACK_TMR_EXP: val = llc->ack_timer.expire / HZ; break; case LLC_OPT_P_TMR_EXP: val = llc->pf_cycle_timer.expire / HZ; break; case LLC_OPT_REJ_TMR_EXP: val = llc->rej_sent_timer.expire / HZ; break; case LLC_OPT_BUSY_TMR_EXP: val = llc->busy_state_timer.expire / HZ; break; case LLC_OPT_TX_WIN: val = llc->k; break; case LLC_OPT_RX_WIN: val = llc->rw; break; case LLC_OPT_PKTINFO: val = (llc->cmsg_flags & LLC_CMSG_PKTINFO) != 0; break; default: rc = -ENOPROTOOPT; goto out; } rc = 0; if (put_user(len, optlen) || copy_to_user(optval, &val, len)) rc = -EFAULT; out: release_sock(sk); return rc; } static const struct net_proto_family llc_ui_family_ops = { .family = PF_LLC, .create = llc_ui_create, .owner = THIS_MODULE, }; static const struct proto_ops llc_ui_ops = { .family = PF_LLC, .owner = THIS_MODULE, .release = llc_ui_release, .bind = llc_ui_bind, .connect = llc_ui_connect, .socketpair = sock_no_socketpair, .accept = llc_ui_accept, .getname = llc_ui_getname, .poll = datagram_poll, .ioctl = llc_ui_ioctl, .listen = llc_ui_listen, .shutdown = llc_ui_shutdown, .setsockopt = llc_ui_setsockopt, .getsockopt = llc_ui_getsockopt, .sendmsg = llc_ui_sendmsg, .recvmsg = llc_ui_recvmsg, .mmap = sock_no_mmap, }; static const char llc_proc_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; static const char llc_sysctl_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the sysctl entries\n"; static const char llc_sock_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the network family\n"; static int __init llc2_init(void) { int rc = proto_register(&llc_proto, 0); if (rc != 0) goto out; llc_build_offset_table(); llc_station_init(); llc_ui_sap_last_autoport = LLC_SAP_DYN_START; rc = llc_proc_init(); if (rc != 0) { printk(llc_proc_err_msg); goto out_station; } rc = llc_sysctl_init(); if (rc) { printk(llc_sysctl_err_msg); goto out_proc; } rc = sock_register(&llc_ui_family_ops); if (rc) { printk(llc_sock_err_msg); goto out_sysctl; } llc_add_pack(LLC_DEST_SAP, llc_sap_handler); llc_add_pack(LLC_DEST_CONN, llc_conn_handler); out: return rc; out_sysctl: llc_sysctl_exit(); out_proc: llc_proc_exit(); out_station: llc_station_exit(); proto_unregister(&llc_proto); goto out; } static void __exit llc2_exit(void) { llc_station_exit(); llc_remove_pack(LLC_DEST_SAP); llc_remove_pack(LLC_DEST_CONN); sock_unregister(PF_LLC); llc_proc_exit(); llc_sysctl_exit(); proto_unregister(&llc_proto); } module_init(llc2_init); module_exit(llc2_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003"); MODULE_DESCRIPTION("IEEE 802.2 PF_LLC support"); MODULE_ALIAS_NETPROTO(PF_LLC); |
78 130 38 72 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl> */ #ifndef _NF_CONNTRACK_ACCT_H #define _NF_CONNTRACK_ACCT_H #include <net/net_namespace.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> struct nf_conn_counter { atomic64_t packets; atomic64_t bytes; }; struct nf_conn_acct { struct nf_conn_counter counter[IP_CT_DIR_MAX]; }; static inline struct nf_conn_acct *nf_conn_acct_find(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_ACCT); } static inline struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) struct net *net = nf_ct_net(ct); struct nf_conn_acct *acct; if (!net->ct.sysctl_acct) return NULL; acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp); if (!acct) pr_debug("failed to add accounting extension area"); return acct; #else return NULL; #endif } /* Check if connection tracking accounting is enabled */ static inline bool nf_ct_acct_enabled(struct net *net) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return net->ct.sysctl_acct != 0; #else return false; #endif } /* Enable/disable connection tracking accounting */ static inline void nf_ct_set_acct(struct net *net, bool enable) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) net->ct.sysctl_acct = enable; #endif } void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets, unsigned int bytes); static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir, unsigned int bytes) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_ct_acct_add(ct, dir, 1, bytes); #endif } void nf_conntrack_acct_pernet_init(struct net *net); #endif /* _NF_CONNTRACK_ACCT_H */ |
2 36 7 1 7 3 6 6 8 2 15 5 9 2 2 7 6 3 4 1 3 3 1 2 1 15 6 2 5 2 2 2 2 2 1 1 1 1 4 2 2 9 9 6 8 6 6 6 6 1 2 3 3 3 2 1 2 1 6 14 14 1 1 12 1 12 1 14 3 1 2 2 2 3 7 2 5 18 8 14 3 12 5 23 23 23 2 26 4 23 22 23 23 1 3 1 2 1 1 1 19 19 19 19 19 7 7 2 6 94 94 1 2 2 1 2 2 1 1 3 2 20 89 5 5 3 27 1 2 10 8 8 7 8 50 46 1 3 2 46 18 27 24 20 33 1 9 29 9 2 2 12 2 1 1 1 1 6 2 4 1 5 3 29 1 4 14 13 25 2 1 25 14 11 1 24 7 16 21 1 18 1 4 10 1 11 1 18 1 18 1 1 19 19 5 2 26 7 6 2 7 7 7 2 1 4 5 1 5 1 4 6 3 2 1 42 22 13 22 86 2 3 2 81 1 22 22 22 2 1 86 77 23 874 869 31 30 5 1 3 2 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2007 Patrick McHardy <kaber@trash.net> * * The code this is based on carried the following copyright notice: * --- * (C) Copyright 2001-2006 * Alex Zeffertt, Cambridge Broadband Ltd, ajz@cambridgebroadband.com * Re-worked by Ben Greear <greearb@candelatech.com> * --- */ #include <linux/kernel.h> #include <linux/types.h> #include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/rculist.h> #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/net_tstamp.h> #include <linux/ethtool.h> #include <linux/if_arp.h> #include <linux/if_vlan.h> #include <linux/if_link.h> #include <linux/if_macvlan.h> #include <linux/hash.h> #include <linux/workqueue.h> #include <net/rtnetlink.h> #include <net/xfrm.h> #include <linux/netpoll.h> #include <linux/phy.h> #define MACVLAN_HASH_BITS 8 #define MACVLAN_HASH_SIZE (1<<MACVLAN_HASH_BITS) #define MACVLAN_DEFAULT_BC_QUEUE_LEN 1000 #define MACVLAN_F_PASSTHRU 1 #define MACVLAN_F_ADDRCHANGE 2 struct macvlan_port { struct net_device *dev; struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; struct list_head vlans; struct sk_buff_head bc_queue; struct work_struct bc_work; u32 bc_queue_len_used; int bc_cutoff; u32 flags; int count; struct hlist_head vlan_source_hash[MACVLAN_HASH_SIZE]; DECLARE_BITMAP(bc_filter, MACVLAN_MC_FILTER_SZ); DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); unsigned char perm_addr[ETH_ALEN]; }; struct macvlan_source_entry { struct hlist_node hlist; struct macvlan_dev *vlan; unsigned char addr[6+2] __aligned(sizeof(u16)); struct rcu_head rcu; }; struct macvlan_skb_cb { const struct macvlan_dev *src; }; #define MACVLAN_SKB_CB(__skb) ((struct macvlan_skb_cb *)&((__skb)->cb[0])) static void macvlan_port_destroy(struct net_device *dev); static void update_port_bc_queue_len(struct macvlan_port *port); static inline bool macvlan_passthru(const struct macvlan_port *port) { return port->flags & MACVLAN_F_PASSTHRU; } static inline void macvlan_set_passthru(struct macvlan_port *port) { port->flags |= MACVLAN_F_PASSTHRU; } static inline bool macvlan_addr_change(const struct macvlan_port *port) { return port->flags & MACVLAN_F_ADDRCHANGE; } static inline void macvlan_set_addr_change(struct macvlan_port *port) { port->flags |= MACVLAN_F_ADDRCHANGE; } static inline void macvlan_clear_addr_change(struct macvlan_port *port) { port->flags &= ~MACVLAN_F_ADDRCHANGE; } /* Hash Ethernet address */ static u32 macvlan_eth_hash(const unsigned char *addr) { u64 value = get_unaligned((u64 *)addr); /* only want 6 bytes */ #ifdef __BIG_ENDIAN value >>= 16; #else value <<= 16; #endif return hash_64(value, MACVLAN_HASH_BITS); } static struct macvlan_port *macvlan_port_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } static struct macvlan_port *macvlan_port_get_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->rx_handler_data); } static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, const unsigned char *addr) { struct macvlan_dev *vlan; u32 idx = macvlan_eth_hash(addr); hlist_for_each_entry_rcu(vlan, &port->vlan_hash[idx], hlist, lockdep_rtnl_is_held()) { if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr)) return vlan; } return NULL; } static struct macvlan_source_entry *macvlan_hash_lookup_source( const struct macvlan_dev *vlan, const unsigned char *addr) { struct macvlan_source_entry *entry; u32 idx = macvlan_eth_hash(addr); struct hlist_head *h = &vlan->port->vlan_source_hash[idx]; hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (ether_addr_equal_64bits(entry->addr, addr) && entry->vlan == vlan) return entry; } return NULL; } static int macvlan_hash_add_source(struct macvlan_dev *vlan, const unsigned char *addr) { struct macvlan_port *port = vlan->port; struct macvlan_source_entry *entry; struct hlist_head *h; entry = macvlan_hash_lookup_source(vlan, addr); if (entry) return 0; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; ether_addr_copy(entry->addr, addr); entry->vlan = vlan; h = &port->vlan_source_hash[macvlan_eth_hash(addr)]; hlist_add_head_rcu(&entry->hlist, h); vlan->macaddr_count++; return 0; } static void macvlan_hash_add(struct macvlan_dev *vlan) { struct macvlan_port *port = vlan->port; const unsigned char *addr = vlan->dev->dev_addr; u32 idx = macvlan_eth_hash(addr); hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[idx]); } static void macvlan_hash_del_source(struct macvlan_source_entry *entry) { hlist_del_rcu(&entry->hlist); kfree_rcu(entry, rcu); } static void macvlan_hash_del(struct macvlan_dev *vlan, bool sync) { hlist_del_rcu(&vlan->hlist); if (sync) synchronize_rcu(); } static void macvlan_hash_change_addr(struct macvlan_dev *vlan, const unsigned char *addr) { macvlan_hash_del(vlan, true); /* Now that we are unhashed it is safe to change the device * address without confusing packet delivery. */ eth_hw_addr_set(vlan->dev, addr); macvlan_hash_add(vlan); } static bool macvlan_addr_busy(const struct macvlan_port *port, const unsigned char *addr) { /* Test to see if the specified address is * currently in use by the underlying device or * another macvlan. */ if (!macvlan_passthru(port) && !macvlan_addr_change(port) && ether_addr_equal_64bits(port->dev->dev_addr, addr)) return true; if (macvlan_hash_lookup(port, addr)) return true; return false; } static int macvlan_broadcast_one(struct sk_buff *skb, const struct macvlan_dev *vlan, const struct ethhdr *eth, bool local) { struct net_device *dev = vlan->dev; if (local) return __dev_forward_skb(dev, skb); skb->dev = dev; if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; return 0; } static u32 macvlan_hash_mix(const struct macvlan_dev *vlan) { return (u32)(((unsigned long)vlan) >> L1_CACHE_SHIFT); } static unsigned int mc_hash(const struct macvlan_dev *vlan, const unsigned char *addr) { u32 val = __get_unaligned_cpu32(addr + 2); val ^= macvlan_hash_mix(vlan); return hash_32(val, MACVLAN_MC_FILTER_BITS); } static void macvlan_broadcast(struct sk_buff *skb, const struct macvlan_port *port, struct net_device *src, enum macvlan_mode mode) { const struct ethhdr *eth = eth_hdr(skb); const struct macvlan_dev *vlan; struct sk_buff *nskb; unsigned int i; int err; unsigned int hash; if (skb->protocol == htons(ETH_P_PAUSE)) return; hash_for_each_rcu(port->vlan_hash, i, vlan, hlist) { if (vlan->dev == src || !(vlan->mode & mode)) continue; hash = mc_hash(vlan, eth->h_dest); if (!test_bit(hash, vlan->mc_filter)) continue; err = NET_RX_DROP; nskb = skb_clone(skb, GFP_ATOMIC); if (likely(nskb)) err = macvlan_broadcast_one(nskb, vlan, eth, mode == MACVLAN_MODE_BRIDGE) ?: netif_rx(nskb); macvlan_count_rx(vlan, skb->len + ETH_HLEN, err == NET_RX_SUCCESS, true); } } static void macvlan_multicast_rx(const struct macvlan_port *port, const struct macvlan_dev *src, struct sk_buff *skb) { if (!src) /* frame comes from an external address */ macvlan_broadcast(skb, port, NULL, MACVLAN_MODE_PRIVATE | MACVLAN_MODE_VEPA | MACVLAN_MODE_PASSTHRU| MACVLAN_MODE_BRIDGE); else if (src->mode == MACVLAN_MODE_VEPA) /* flood to everyone except source */ macvlan_broadcast(skb, port, src->dev, MACVLAN_MODE_VEPA | MACVLAN_MODE_BRIDGE); else /* * flood only to VEPA ports, bridge ports * already saw the frame on the way out. */ macvlan_broadcast(skb, port, src->dev, MACVLAN_MODE_VEPA); } static void macvlan_process_broadcast(struct work_struct *w) { struct macvlan_port *port = container_of(w, struct macvlan_port, bc_work); struct sk_buff *skb; struct sk_buff_head list; __skb_queue_head_init(&list); spin_lock_bh(&port->bc_queue.lock); skb_queue_splice_tail_init(&port->bc_queue, &list); spin_unlock_bh(&port->bc_queue.lock); while ((skb = __skb_dequeue(&list))) { const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; rcu_read_lock(); macvlan_multicast_rx(port, src, skb); rcu_read_unlock(); if (src) dev_put(src->dev); consume_skb(skb); cond_resched(); } } static void macvlan_broadcast_enqueue(struct macvlan_port *port, const struct macvlan_dev *src, struct sk_buff *skb) { struct sk_buff *nskb; int err = -ENOMEM; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) goto err; MACVLAN_SKB_CB(nskb)->src = src; spin_lock(&port->bc_queue.lock); if (skb_queue_len(&port->bc_queue) < port->bc_queue_len_used) { if (src) dev_hold(src->dev); __skb_queue_tail(&port->bc_queue, nskb); err = 0; } spin_unlock(&port->bc_queue.lock); queue_work(system_unbound_wq, &port->bc_work); if (err) goto free_nskb; return; free_nskb: kfree_skb(nskb); err: dev_core_stats_rx_dropped_inc(skb->dev); } static void macvlan_flush_sources(struct macvlan_port *port, struct macvlan_dev *vlan) { struct macvlan_source_entry *entry; struct hlist_node *next; int i; hash_for_each_safe(port->vlan_source_hash, i, next, entry, hlist) if (entry->vlan == vlan) macvlan_hash_del_source(entry); vlan->macaddr_count = 0; } static void macvlan_forward_source_one(struct sk_buff *skb, struct macvlan_dev *vlan) { struct sk_buff *nskb; struct net_device *dev; int len; int ret; dev = vlan->dev; if (unlikely(!(dev->flags & IFF_UP))) return; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return; len = nskb->len + ETH_HLEN; nskb->dev = dev; if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, dev->dev_addr)) nskb->pkt_type = PACKET_HOST; ret = __netif_rx(nskb); macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false); } static bool macvlan_forward_source(struct sk_buff *skb, struct macvlan_port *port, const unsigned char *addr) { struct macvlan_source_entry *entry; u32 idx = macvlan_eth_hash(addr); struct hlist_head *h = &port->vlan_source_hash[idx]; bool consume = false; hlist_for_each_entry_rcu(entry, h, hlist) { if (ether_addr_equal_64bits(entry->addr, addr)) { if (entry->vlan->flags & MACVLAN_FLAG_NODST) consume = true; macvlan_forward_source_one(skb, entry->vlan); } } return consume; } /* called under rcu_read_lock() from netif_receive_skb */ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) { struct macvlan_port *port; struct sk_buff *skb = *pskb; const struct ethhdr *eth = eth_hdr(skb); const struct macvlan_dev *vlan; const struct macvlan_dev *src; struct net_device *dev; unsigned int len = 0; int ret; rx_handler_result_t handle_res; /* Packets from dev_loopback_xmit() do not have L2 header, bail out */ if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; port = macvlan_port_get_rcu(skb->dev); if (is_multicast_ether_addr(eth->h_dest)) { unsigned int hash; skb = ip_check_defrag(dev_net(skb->dev), skb, IP_DEFRAG_MACVLAN); if (!skb) return RX_HANDLER_CONSUMED; *pskb = skb; eth = eth_hdr(skb); if (macvlan_forward_source(skb, port, eth->h_source)) { kfree_skb(skb); return RX_HANDLER_CONSUMED; } src = macvlan_hash_lookup(port, eth->h_source); if (src && src->mode != MACVLAN_MODE_VEPA && src->mode != MACVLAN_MODE_BRIDGE) { /* forward to original port. */ vlan = src; ret = macvlan_broadcast_one(skb, vlan, eth, 0) ?: __netif_rx(skb); handle_res = RX_HANDLER_CONSUMED; goto out; } hash = mc_hash(NULL, eth->h_dest); if (test_bit(hash, port->bc_filter)) macvlan_broadcast_enqueue(port, src, skb); else if (test_bit(hash, port->mc_filter)) macvlan_multicast_rx(port, src, skb); return RX_HANDLER_PASS; } if (macvlan_forward_source(skb, port, eth->h_source)) { kfree_skb(skb); return RX_HANDLER_CONSUMED; } if (macvlan_passthru(port)) vlan = list_first_or_null_rcu(&port->vlans, struct macvlan_dev, list); else vlan = macvlan_hash_lookup(port, eth->h_dest); if (!vlan || vlan->mode == MACVLAN_MODE_SOURCE) return RX_HANDLER_PASS; dev = vlan->dev; if (unlikely(!(dev->flags & IFF_UP))) { kfree_skb(skb); return RX_HANDLER_CONSUMED; } len = skb->len + ETH_HLEN; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) { ret = NET_RX_DROP; handle_res = RX_HANDLER_CONSUMED; goto out; } *pskb = skb; skb->dev = dev; skb->pkt_type = PACKET_HOST; ret = NET_RX_SUCCESS; handle_res = RX_HANDLER_ANOTHER; out: macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false); return handle_res; } static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) { const struct macvlan_dev *vlan = netdev_priv(dev); const struct macvlan_port *port = vlan->port; const struct macvlan_dev *dest; if (vlan->mode == MACVLAN_MODE_BRIDGE) { const struct ethhdr *eth = skb_eth_hdr(skb); /* send to other bridge ports directly */ if (is_multicast_ether_addr(eth->h_dest)) { skb_reset_mac_header(skb); macvlan_broadcast(skb, port, dev, MACVLAN_MODE_BRIDGE); goto xmit_world; } dest = macvlan_hash_lookup(port, eth->h_dest); if (dest && dest->mode == MACVLAN_MODE_BRIDGE) { /* send to lowerdev first for its network taps */ dev_forward_skb(vlan->lowerdev, skb); return NET_XMIT_SUCCESS; } } xmit_world: skb->dev = vlan->lowerdev; return dev_queue_xmit_accel(skb, netdev_get_sb_channel(dev) ? dev : NULL); } static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER return netpoll_send_skb(vlan->netpoll, skb); #else BUG(); return NETDEV_TX_OK; #endif } static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); unsigned int len = skb->len; int ret; if (unlikely(netpoll_tx_running(dev))) return macvlan_netpoll_send_skb(vlan, skb); ret = macvlan_queue_xmit(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *pcpu_stats; pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); u64_stats_inc(&pcpu_stats->tx_packets); u64_stats_add(&pcpu_stats->tx_bytes, len); u64_stats_update_end(&pcpu_stats->syncp); } else { this_cpu_inc(vlan->pcpu_stats->tx_dropped); } return ret; } static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len) { const struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; return dev_hard_header(skb, lowerdev, type, daddr, saddr ? : dev->dev_addr, len); } static const struct header_ops macvlan_hard_header_ops = { .create = macvlan_hard_header, .parse = eth_header_parse, .cache = eth_header_cache, .cache_update = eth_header_cache_update, .parse_protocol = eth_header_parse_protocol, }; static int macvlan_open(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; int err; if (macvlan_passthru(vlan->port)) { if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) { err = dev_set_promiscuity(lowerdev, 1); if (err < 0) goto out; } goto hash_add; } err = -EADDRINUSE; if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; /* Attempt to populate accel_priv which is used to offload the L2 * forwarding requests for unicast packets. */ if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) vlan->accel_priv = lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev); /* If earlier attempt to offload failed, or accel_priv is not * populated we must add the unicast address to the lower device. */ if (IS_ERR_OR_NULL(vlan->accel_priv)) { vlan->accel_priv = NULL; err = dev_uc_add(lowerdev, dev->dev_addr); if (err < 0) goto out; } if (dev->flags & IFF_ALLMULTI) { err = dev_set_allmulti(lowerdev, 1); if (err < 0) goto del_unicast; } if (dev->flags & IFF_PROMISC) { err = dev_set_promiscuity(lowerdev, 1); if (err < 0) goto clear_multi; } hash_add: macvlan_hash_add(vlan); return 0; clear_multi: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(lowerdev, -1); del_unicast: if (vlan->accel_priv) { lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, vlan->accel_priv); vlan->accel_priv = NULL; } else { dev_uc_del(lowerdev, dev->dev_addr); } out: return err; } static int macvlan_stop(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; if (vlan->accel_priv) { lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, vlan->accel_priv); vlan->accel_priv = NULL; } dev_uc_unsync(lowerdev, dev); dev_mc_unsync(lowerdev, dev); if (macvlan_passthru(vlan->port)) { if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) dev_set_promiscuity(lowerdev, -1); goto hash_del; } if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(lowerdev, -1); if (dev->flags & IFF_PROMISC) dev_set_promiscuity(lowerdev, -1); dev_uc_del(lowerdev, dev->dev_addr); hash_del: macvlan_hash_del(vlan, !dev->dismantle); return 0; } static int macvlan_sync_address(struct net_device *dev, const unsigned char *addr) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; struct macvlan_port *port = vlan->port; int err; if (!(dev->flags & IFF_UP)) { /* Just copy in the new address */ eth_hw_addr_set(dev, addr); } else { /* Rehash and update the device filters */ if (macvlan_addr_busy(vlan->port, addr)) return -EADDRINUSE; if (!macvlan_passthru(port)) { err = dev_uc_add(lowerdev, addr); if (err) return err; dev_uc_del(lowerdev, dev->dev_addr); } macvlan_hash_change_addr(vlan, addr); } if (macvlan_passthru(port) && !macvlan_addr_change(port)) { /* Since addr_change isn't set, we are here due to lower * device change. Save the lower-dev address so we can * restore it later. */ ether_addr_copy(vlan->port->perm_addr, lowerdev->dev_addr); } macvlan_clear_addr_change(port); return 0; } static int macvlan_set_mac_address(struct net_device *dev, void *p) { struct macvlan_dev *vlan = netdev_priv(dev); struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; /* If the addresses are the same, this is a no-op */ if (ether_addr_equal(dev->dev_addr, addr->sa_data)) return 0; if (vlan->mode == MACVLAN_MODE_PASSTHRU) { macvlan_set_addr_change(vlan->port); return dev_set_mac_address(vlan->lowerdev, addr, NULL); } if (macvlan_addr_busy(vlan->port, addr->sa_data)) return -EADDRINUSE; return macvlan_sync_address(dev, addr->sa_data); } static void macvlan_change_rx_flags(struct net_device *dev, int change) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; if (dev->flags & IFF_UP) { if (change & IFF_ALLMULTI) dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC) dev_set_promiscuity(lowerdev, dev->flags & IFF_PROMISC ? 1 : -1); } } static void macvlan_compute_filter(unsigned long *mc_filter, struct net_device *dev, struct macvlan_dev *vlan, int cutoff) { if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { bitmap_fill(mc_filter, MACVLAN_MC_FILTER_SZ); } else { DECLARE_BITMAP(filter, MACVLAN_MC_FILTER_SZ); struct netdev_hw_addr *ha; bitmap_zero(filter, MACVLAN_MC_FILTER_SZ); netdev_for_each_mc_addr(ha, dev) { if (!vlan && ha->synced <= cutoff) continue; __set_bit(mc_hash(vlan, ha->addr), filter); } __set_bit(mc_hash(vlan, dev->broadcast), filter); bitmap_copy(mc_filter, filter, MACVLAN_MC_FILTER_SZ); } } static void macvlan_recompute_bc_filter(struct macvlan_dev *vlan) { if (vlan->port->bc_cutoff < 0) { bitmap_zero(vlan->port->bc_filter, MACVLAN_MC_FILTER_SZ); return; } macvlan_compute_filter(vlan->port->bc_filter, vlan->lowerdev, NULL, vlan->port->bc_cutoff); } static void macvlan_set_mac_lists(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); macvlan_compute_filter(vlan->mc_filter, dev, vlan, 0); dev_uc_sync(vlan->lowerdev, dev); dev_mc_sync(vlan->lowerdev, dev); /* This is slightly inaccurate as we're including the subscription * list of vlan->lowerdev too. * * Bug alert: This only works if everyone has the same broadcast * address as lowerdev. As soon as someone changes theirs this * will break. * * However, this is already broken as when you change your broadcast * address we don't get called. * * The solution is to maintain a list of broadcast addresses like * we do for uc/mc, if you care. */ macvlan_compute_filter(vlan->port->mc_filter, vlan->lowerdev, NULL, 0); macvlan_recompute_bc_filter(vlan); } static void update_port_bc_cutoff(struct macvlan_dev *vlan, int cutoff) { if (vlan->port->bc_cutoff == cutoff) return; vlan->port->bc_cutoff = cutoff; macvlan_recompute_bc_filter(vlan); } static int macvlan_change_mtu(struct net_device *dev, int new_mtu) { struct macvlan_dev *vlan = netdev_priv(dev); if (vlan->lowerdev->mtu < new_mtu) return -EINVAL; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static int macvlan_hwtstamp_get(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { struct net_device *real_dev = macvlan_dev_real_dev(dev); return generic_hwtstamp_get_lower(real_dev, cfg); } static int macvlan_hwtstamp_set(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct net_device *real_dev = macvlan_dev_real_dev(dev); if (!net_eq(dev_net(dev), &init_net)) return -EOPNOTSUPP; return generic_hwtstamp_set_lower(real_dev, cfg, extack); } /* * macvlan network devices have devices nesting below it and are a special * "super class" of normal network devices; split their locks off into a * separate class since they always nest. */ static struct lock_class_key macvlan_netdev_addr_lock_key; #define ALWAYS_ON_OFFLOADS \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \ NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL) #define ALWAYS_ON_FEATURES ALWAYS_ON_OFFLOADS #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) static void macvlan_set_lockdep_class(struct net_device *dev) { netdev_lockdep_set_classes(dev); lockdep_set_class(&dev->addr_list_lock, &macvlan_netdev_addr_lock_key); } static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; struct macvlan_port *port = vlan->port; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); dev->features = lowerdev->features & MACVLAN_FEATURES; dev->features |= ALWAYS_ON_FEATURES; dev->hw_features |= NETIF_F_LRO; dev->vlan_features = lowerdev->vlan_features & MACVLAN_FEATURES; dev->vlan_features |= ALWAYS_ON_OFFLOADS; dev->hw_enc_features |= dev->features; dev->lltx = true; netif_inherit_tso_max(dev, lowerdev); dev->hard_header_len = lowerdev->hard_header_len; macvlan_set_lockdep_class(dev); vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->pcpu_stats) return -ENOMEM; port->count += 1; /* Get macvlan's reference to lowerdev */ netdev_hold(lowerdev, &vlan->dev_tracker, GFP_KERNEL); return 0; } static void macvlan_uninit(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port = vlan->port; free_percpu(vlan->pcpu_stats); macvlan_flush_sources(port, vlan); port->count -= 1; if (!port->count) macvlan_port_destroy(port->dev); } static void macvlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct macvlan_dev *vlan = netdev_priv(dev); if (vlan->pcpu_stats) { struct vlan_pcpu_stats *p; u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes; u32 rx_errors = 0, tx_dropped = 0; unsigned int start; int i; for_each_possible_cpu(i) { p = per_cpu_ptr(vlan->pcpu_stats, i); do { start = u64_stats_fetch_begin(&p->syncp); rx_packets = u64_stats_read(&p->rx_packets); rx_bytes = u64_stats_read(&p->rx_bytes); rx_multicast = u64_stats_read(&p->rx_multicast); tx_packets = u64_stats_read(&p->tx_packets); tx_bytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; stats->multicast += rx_multicast; stats->tx_packets += tx_packets; stats->tx_bytes += tx_bytes; /* rx_errors & tx_dropped are u32, updated * without syncp protection. */ rx_errors += READ_ONCE(p->rx_errors); tx_dropped += READ_ONCE(p->tx_dropped); } stats->rx_errors = rx_errors; stats->rx_dropped = rx_errors; stats->tx_dropped = tx_dropped; } } static int macvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; return vlan_vid_add(lowerdev, proto, vid); } static int macvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; vlan_vid_del(lowerdev, proto, vid); return 0; } static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags, struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); int err = -EINVAL; /* Support unicast filter only on passthru devices. * Multicast filter should be allowed on all devices. */ if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr)) return -EOPNOTSUPP; if (flags & NLM_F_REPLACE) return -EOPNOTSUPP; if (is_unicast_ether_addr(addr)) err = dev_uc_add_excl(dev, addr); else if (is_multicast_ether_addr(addr)) err = dev_mc_add_excl(dev, addr); return err; } static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); int err = -EINVAL; /* Support unicast filter only on passthru devices. * Multicast filter should be allowed on all devices. */ if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr)) return -EOPNOTSUPP; if (is_unicast_ether_addr(addr)) err = dev_uc_del(dev, addr); else if (is_multicast_ether_addr(addr)) err = dev_mc_del(dev, addr); return err; } static void macvlan_ethtool_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { strscpy(drvinfo->driver, "macvlan", sizeof(drvinfo->driver)); strscpy(drvinfo->version, "0.1", sizeof(drvinfo->version)); } static int macvlan_ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { const struct macvlan_dev *vlan = netdev_priv(dev); return __ethtool_get_link_ksettings(vlan->lowerdev, cmd); } static int macvlan_ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct net_device *real_dev = macvlan_dev_real_dev(dev); return ethtool_get_ts_info_by_layer(real_dev, info); } static netdev_features_t macvlan_fix_features(struct net_device *dev, netdev_features_t features) { struct macvlan_dev *vlan = netdev_priv(dev); netdev_features_t lowerdev_features = vlan->lowerdev->features; netdev_features_t mask; features |= NETIF_F_ALL_FOR_ALL; features &= (vlan->set_features | ~MACVLAN_FEATURES); mask = features; lowerdev_features &= (features | ~NETIF_F_LRO); features = netdev_increment_features(lowerdev_features, features, mask); features |= ALWAYS_ON_FEATURES; features &= (ALWAYS_ON_FEATURES | MACVLAN_FEATURES); return features; } #ifdef CONFIG_NET_POLL_CONTROLLER static void macvlan_dev_poll_controller(struct net_device *dev) { return; } static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *real_dev = vlan->lowerdev; struct netpoll *netpoll; int err; netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); err = -ENOMEM; if (!netpoll) goto out; err = __netpoll_setup(netpoll, real_dev); if (err) { kfree(netpoll); goto out; } vlan->netpoll = netpoll; out: return err; } static void macvlan_dev_netpoll_cleanup(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct netpoll *netpoll = vlan->netpoll; if (!netpoll) return; vlan->netpoll = NULL; __netpoll_free(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ static int macvlan_dev_get_iflink(const struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); return READ_ONCE(vlan->lowerdev->ifindex); } static const struct ethtool_ops macvlan_ethtool_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = macvlan_ethtool_get_link_ksettings, .get_drvinfo = macvlan_ethtool_get_drvinfo, .get_ts_info = macvlan_ethtool_get_ts_info, }; static const struct net_device_ops macvlan_netdev_ops = { .ndo_init = macvlan_init, .ndo_uninit = macvlan_uninit, .ndo_open = macvlan_open, .ndo_stop = macvlan_stop, .ndo_start_xmit = macvlan_start_xmit, .ndo_change_mtu = macvlan_change_mtu, .ndo_fix_features = macvlan_fix_features, .ndo_change_rx_flags = macvlan_change_rx_flags, .ndo_set_mac_address = macvlan_set_mac_address, .ndo_set_rx_mode = macvlan_set_mac_lists, .ndo_get_stats64 = macvlan_dev_get_stats64, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = macvlan_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = macvlan_vlan_rx_kill_vid, .ndo_fdb_add = macvlan_fdb_add, .ndo_fdb_del = macvlan_fdb_del, .ndo_fdb_dump = ndo_dflt_fdb_dump, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = macvlan_dev_poll_controller, .ndo_netpoll_setup = macvlan_dev_netpoll_setup, .ndo_netpoll_cleanup = macvlan_dev_netpoll_cleanup, #endif .ndo_get_iflink = macvlan_dev_get_iflink, .ndo_features_check = passthru_features_check, .ndo_hwtstamp_get = macvlan_hwtstamp_get, .ndo_hwtstamp_set = macvlan_hwtstamp_set, }; static void macvlan_dev_free(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); /* Get rid of the macvlan's reference to lowerdev */ netdev_put(vlan->lowerdev, &vlan->dev_tracker); } void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); /* ether_setup() has set dev->min_mtu to ETH_MIN_MTU. */ dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); dev->priv_flags |= IFF_UNICAST_FLT; dev->change_proto_down = true; dev->netdev_ops = &macvlan_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = macvlan_dev_free; dev->header_ops = &macvlan_hard_header_ops; dev->ethtool_ops = &macvlan_ethtool_ops; } EXPORT_SYMBOL_GPL(macvlan_common_setup); static void macvlan_setup(struct net_device *dev) { macvlan_common_setup(dev); dev->priv_flags |= IFF_NO_QUEUE; } static int macvlan_port_create(struct net_device *dev) { struct macvlan_port *port; unsigned int i; int err; if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) return -EINVAL; if (netdev_is_rx_handler_busy(dev)) return -EBUSY; port = kzalloc(sizeof(*port), GFP_KERNEL); if (port == NULL) return -ENOMEM; port->dev = dev; ether_addr_copy(port->perm_addr, dev->dev_addr); INIT_LIST_HEAD(&port->vlans); for (i = 0; i < MACVLAN_HASH_SIZE; i++) INIT_HLIST_HEAD(&port->vlan_hash[i]); for (i = 0; i < MACVLAN_HASH_SIZE; i++) INIT_HLIST_HEAD(&port->vlan_source_hash[i]); port->bc_queue_len_used = 0; port->bc_cutoff = 1; skb_queue_head_init(&port->bc_queue); INIT_WORK(&port->bc_work, macvlan_process_broadcast); err = netdev_rx_handler_register(dev, macvlan_handle_frame, port); if (err) kfree(port); else dev->priv_flags |= IFF_MACVLAN_PORT; return err; } static void macvlan_port_destroy(struct net_device *dev) { struct macvlan_port *port = macvlan_port_get_rtnl(dev); struct sk_buff *skb; dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); /* After this point, no packet can schedule bc_work anymore, * but we need to cancel it and purge left skbs if any. */ cancel_work_sync(&port->bc_work); while ((skb = __skb_dequeue(&port->bc_queue))) { const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; if (src) dev_put(src->dev); kfree_skb(skb); } /* If the lower device address has been changed by passthru * macvlan, put it back. */ if (macvlan_passthru(port) && !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) { struct sockaddr sa; sa.sa_family = port->dev->type; memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len); dev_set_mac_address(port->dev, &sa, NULL); } kfree(port); } static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct nlattr *nla, *head; int rem, len; if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } if (!data) return 0; if (data[IFLA_MACVLAN_FLAGS] && nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~(MACVLAN_FLAG_NOPROMISC | MACVLAN_FLAG_NODST)) return -EINVAL; if (data[IFLA_MACVLAN_MODE]) { switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) { case MACVLAN_MODE_PRIVATE: case MACVLAN_MODE_VEPA: case MACVLAN_MODE_BRIDGE: case MACVLAN_MODE_PASSTHRU: case MACVLAN_MODE_SOURCE: break; default: return -EINVAL; } } if (data[IFLA_MACVLAN_MACADDR_MODE]) { switch (nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE])) { case MACVLAN_MACADDR_ADD: case MACVLAN_MACADDR_DEL: case MACVLAN_MACADDR_FLUSH: case MACVLAN_MACADDR_SET: break; default: return -EINVAL; } } if (data[IFLA_MACVLAN_MACADDR]) { if (nla_len(data[IFLA_MACVLAN_MACADDR]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(data[IFLA_MACVLAN_MACADDR]))) return -EADDRNOTAVAIL; } if (data[IFLA_MACVLAN_MACADDR_DATA]) { head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]); len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]); nla_for_each_attr(nla, head, len, rem) { if (nla_type(nla) != IFLA_MACVLAN_MACADDR || nla_len(nla) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(nla))) return -EADDRNOTAVAIL; } } if (data[IFLA_MACVLAN_MACADDR_COUNT]) return -EINVAL; return 0; } /* * reconfigure list of remote source mac address * (only for macvlan devices in source mode) * Note regarding alignment: all netlink data is aligned to 4 Byte, which * suffices for both ether_addr_copy and ether_addr_equal_64bits usage. */ static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode, struct nlattr *data[]) { char *addr = NULL; int ret, rem, len; struct nlattr *nla, *head; struct macvlan_source_entry *entry; if (data[IFLA_MACVLAN_MACADDR]) addr = nla_data(data[IFLA_MACVLAN_MACADDR]); if (mode == MACVLAN_MACADDR_ADD) { if (!addr) return -EINVAL; return macvlan_hash_add_source(vlan, addr); } else if (mode == MACVLAN_MACADDR_DEL) { if (!addr) return -EINVAL; entry = macvlan_hash_lookup_source(vlan, addr); if (entry) { macvlan_hash_del_source(entry); vlan->macaddr_count--; } } else if (mode == MACVLAN_MACADDR_FLUSH) { macvlan_flush_sources(vlan->port, vlan); } else if (mode == MACVLAN_MACADDR_SET) { macvlan_flush_sources(vlan->port, vlan); if (addr) { ret = macvlan_hash_add_source(vlan, addr); if (ret) return ret; } if (!data[IFLA_MACVLAN_MACADDR_DATA]) return 0; head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]); len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]); nla_for_each_attr(nla, head, len, rem) { addr = nla_data(nla); ret = macvlan_hash_add_source(vlan, addr); if (ret) return ret; } } else { return -EINVAL; } return 0; } int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port; struct net_device *lowerdev; int err; int macmode; bool create = false; if (!tb[IFLA_LINK]) return -EINVAL; lowerdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (lowerdev == NULL) return -ENODEV; /* When creating macvlans or macvtaps on top of other macvlans - use * the real device as the lowerdev. */ if (netif_is_macvlan(lowerdev)) lowerdev = macvlan_dev_real_dev(lowerdev); if (!tb[IFLA_MTU]) dev->mtu = lowerdev->mtu; else if (dev->mtu > lowerdev->mtu) return -EINVAL; /* MTU range: 68 - lowerdev->max_mtu */ dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = lowerdev->max_mtu; if (!tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); if (!netif_is_macvlan_port(lowerdev)) { err = macvlan_port_create(lowerdev); if (err < 0) return err; create = true; } port = macvlan_port_get_rtnl(lowerdev); /* Only 1 macvlan device can be created in passthru mode */ if (macvlan_passthru(port)) { /* The macvlan port must be not created this time, * still goto destroy_macvlan_port for readability. */ err = -EINVAL; goto destroy_macvlan_port; } vlan->lowerdev = lowerdev; vlan->dev = dev; vlan->port = port; vlan->set_features = MACVLAN_FEATURES; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); if (data && data[IFLA_MACVLAN_FLAGS]) vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); if (vlan->mode == MACVLAN_MODE_PASSTHRU) { if (port->count) { err = -EINVAL; goto destroy_macvlan_port; } macvlan_set_passthru(port); eth_hw_addr_inherit(dev, lowerdev); } if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { if (vlan->mode != MACVLAN_MODE_SOURCE) { err = -EINVAL; goto destroy_macvlan_port; } macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]); err = macvlan_changelink_sources(vlan, macmode, data); if (err) goto destroy_macvlan_port; } vlan->bc_queue_len_req = MACVLAN_DEFAULT_BC_QUEUE_LEN; if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]); if (data && data[IFLA_MACVLAN_BC_CUTOFF]) update_port_bc_cutoff( vlan, nla_get_s32(data[IFLA_MACVLAN_BC_CUTOFF])); err = register_netdevice(dev); if (err < 0) goto destroy_macvlan_port; dev->priv_flags |= IFF_MACVLAN; err = netdev_upper_dev_link(lowerdev, dev, extack); if (err) goto unregister_netdev; list_add_tail_rcu(&vlan->list, &port->vlans); update_port_bc_queue_len(vlan->port); netif_stacked_transfer_operstate(lowerdev, dev); linkwatch_fire_event(dev); return 0; unregister_netdev: /* macvlan_uninit would free the macvlan port */ unregister_netdevice(dev); return err; destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ if (create && macvlan_port_get_rtnl(lowerdev)) { macvlan_flush_sources(port, vlan); macvlan_port_destroy(port->dev); } return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); static int macvlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { return macvlan_common_newlink(src_net, dev, tb, data, extack); } void macvlan_dellink(struct net_device *dev, struct list_head *head) { struct macvlan_dev *vlan = netdev_priv(dev); if (vlan->mode == MACVLAN_MODE_SOURCE) macvlan_flush_sources(vlan->port, vlan); list_del_rcu(&vlan->list); update_port_bc_queue_len(vlan->port); unregister_netdevice_queue(dev, head); netdev_upper_dev_unlink(vlan->lowerdev, dev); } EXPORT_SYMBOL_GPL(macvlan_dellink); static int macvlan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); enum macvlan_mode mode; bool set_mode = false; enum macvlan_macaddr_mode macmode; int ret; /* Validate mode, but don't set yet: setting flags may fail. */ if (data && data[IFLA_MACVLAN_MODE]) { set_mode = true; mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); /* Passthrough mode can't be set or cleared dynamically */ if ((mode == MACVLAN_MODE_PASSTHRU) != (vlan->mode == MACVLAN_MODE_PASSTHRU)) return -EINVAL; if (vlan->mode == MACVLAN_MODE_SOURCE && vlan->mode != mode) macvlan_flush_sources(vlan->port, vlan); } if (data && data[IFLA_MACVLAN_FLAGS]) { __u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC; if (macvlan_passthru(vlan->port) && promisc) { int err; if (flags & MACVLAN_FLAG_NOPROMISC) err = dev_set_promiscuity(vlan->lowerdev, -1); else err = dev_set_promiscuity(vlan->lowerdev, 1); if (err < 0) return err; } vlan->flags = flags; } if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) { vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]); update_port_bc_queue_len(vlan->port); } if (data && data[IFLA_MACVLAN_BC_CUTOFF]) update_port_bc_cutoff( vlan, nla_get_s32(data[IFLA_MACVLAN_BC_CUTOFF])); if (set_mode) vlan->mode = mode; if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { if (vlan->mode != MACVLAN_MODE_SOURCE) return -EINVAL; macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]); ret = macvlan_changelink_sources(vlan, macmode, data); if (ret) return ret; } return 0; } static size_t macvlan_get_size_mac(const struct macvlan_dev *vlan) { if (vlan->macaddr_count == 0) return 0; return nla_total_size(0) /* IFLA_MACVLAN_MACADDR_DATA */ + vlan->macaddr_count * nla_total_size(sizeof(u8) * ETH_ALEN); } static size_t macvlan_get_size(const struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); return (0 + nla_total_size(4) /* IFLA_MACVLAN_MODE */ + nla_total_size(2) /* IFLA_MACVLAN_FLAGS */ + nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */ + macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */ + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN */ + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN_USED */ ); } static int macvlan_fill_info_macaddr(struct sk_buff *skb, const struct macvlan_dev *vlan, const int i) { struct hlist_head *h = &vlan->port->vlan_source_hash[i]; struct macvlan_source_entry *entry; hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (entry->vlan != vlan) continue; if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr)) return 1; } return 0; } static int macvlan_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port = vlan->port; int i; struct nlattr *nest; if (nla_put_u32(skb, IFLA_MACVLAN_MODE, vlan->mode)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_MACVLAN_FLAGS, vlan->flags)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_MACVLAN_MACADDR_COUNT, vlan->macaddr_count)) goto nla_put_failure; if (vlan->macaddr_count > 0) { nest = nla_nest_start_noflag(skb, IFLA_MACVLAN_MACADDR_DATA); if (nest == NULL) goto nla_put_failure; for (i = 0; i < MACVLAN_HASH_SIZE; i++) { if (macvlan_fill_info_macaddr(skb, vlan, i)) goto nla_put_failure; } nla_nest_end(skb, nest); } if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN, vlan->bc_queue_len_req)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, port->bc_queue_len_used)) goto nla_put_failure; if (port->bc_cutoff != 1 && nla_put_s32(skb, IFLA_MACVLAN_BC_CUTOFF, port->bc_cutoff)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { [IFLA_MACVLAN_MODE] = { .type = NLA_U32 }, [IFLA_MACVLAN_FLAGS] = { .type = NLA_U16 }, [IFLA_MACVLAN_MACADDR_MODE] = { .type = NLA_U32 }, [IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED }, [IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 }, [IFLA_MACVLAN_BC_QUEUE_LEN] = { .type = NLA_U32 }, [IFLA_MACVLAN_BC_QUEUE_LEN_USED] = { .type = NLA_REJECT }, [IFLA_MACVLAN_BC_CUTOFF] = { .type = NLA_S32 }, }; int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ ops->validate = macvlan_validate; ops->maxtype = IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; ops->changelink = macvlan_changelink; ops->get_size = macvlan_get_size; ops->fill_info = macvlan_fill_info; return rtnl_link_register(ops); }; EXPORT_SYMBOL_GPL(macvlan_link_register); static struct net *macvlan_get_link_net(const struct net_device *dev) { return dev_net(macvlan_dev_real_dev(dev)); } static struct rtnl_link_ops macvlan_link_ops = { .kind = "macvlan", .setup = macvlan_setup, .newlink = macvlan_newlink, .dellink = macvlan_dellink, .get_link_net = macvlan_get_link_net, .priv_size = sizeof(struct macvlan_dev), }; static void update_port_bc_queue_len(struct macvlan_port *port) { u32 max_bc_queue_len_req = 0; struct macvlan_dev *vlan; list_for_each_entry(vlan, &port->vlans, list) { if (vlan->bc_queue_len_req > max_bc_queue_len_req) max_bc_queue_len_req = vlan->bc_queue_len_req; } port->bc_queue_len_used = max_bc_queue_len_req; } static int macvlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct macvlan_dev *vlan, *next; struct macvlan_port *port; LIST_HEAD(list_kill); if (!netif_is_macvlan_port(dev)) return NOTIFY_DONE; port = macvlan_port_get_rtnl(dev); switch (event) { case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: list_for_each_entry(vlan, &port->vlans, list) netif_stacked_transfer_operstate(vlan->lowerdev, vlan->dev); break; case NETDEV_FEAT_CHANGE: list_for_each_entry(vlan, &port->vlans, list) { netif_inherit_tso_max(vlan->dev, dev); netdev_update_features(vlan->dev); } break; case NETDEV_CHANGEMTU: list_for_each_entry(vlan, &port->vlans, list) { if (vlan->dev->mtu <= dev->mtu) continue; dev_set_mtu(vlan->dev, dev->mtu); } break; case NETDEV_CHANGEADDR: if (!macvlan_passthru(port)) return NOTIFY_DONE; vlan = list_first_entry_or_null(&port->vlans, struct macvlan_dev, list); if (vlan && macvlan_sync_address(vlan->dev, dev->dev_addr)) return NOTIFY_BAD; break; case NETDEV_UNREGISTER: /* twiddle thumbs on netns device moves */ if (dev->reg_state != NETREG_UNREGISTERING) break; list_for_each_entry_safe(vlan, next, &port->vlans, list) vlan->dev->rtnl_link_ops->dellink(vlan->dev, &list_kill); unregister_netdevice_many(&list_kill); break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlying device to change its type. */ return NOTIFY_BAD; case NETDEV_NOTIFY_PEERS: case NETDEV_BONDING_FAILOVER: case NETDEV_RESEND_IGMP: /* Propagate to all vlans */ list_for_each_entry(vlan, &port->vlans, list) call_netdevice_notifiers(event, vlan->dev); } return NOTIFY_DONE; } static struct notifier_block macvlan_notifier_block __read_mostly = { .notifier_call = macvlan_device_event, }; static int __init macvlan_init_module(void) { int err; register_netdevice_notifier(&macvlan_notifier_block); err = macvlan_link_register(&macvlan_link_ops); if (err < 0) goto err1; return 0; err1: unregister_netdevice_notifier(&macvlan_notifier_block); return err; } static void __exit macvlan_cleanup_module(void) { rtnl_link_unregister(&macvlan_link_ops); unregister_netdevice_notifier(&macvlan_notifier_block); } module_init(macvlan_init_module); module_exit(macvlan_cleanup_module); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Driver for MAC address based VLANs"); MODULE_ALIAS_RTNL_LINK("macvlan"); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright(c) 2020 Intel Corporation. */ #ifndef XSK_BUFF_POOL_H_ #define XSK_BUFF_POOL_H_ #include <linux/if_xdp.h> #include <linux/types.h> #include <linux/dma-mapping.h> #include <linux/bpf.h> #include <net/xdp.h> struct xsk_buff_pool; struct xdp_rxq_info; struct xsk_cb_desc; struct xsk_queue; struct xdp_desc; struct xdp_umem; struct xdp_sock; struct device; struct page; #define XSK_PRIV_MAX 24 struct xdp_buff_xsk { struct xdp_buff xdp; u8 cb[XSK_PRIV_MAX]; dma_addr_t dma; dma_addr_t frame_dma; struct xsk_buff_pool *pool; u64 orig_addr; struct list_head free_list_node; struct list_head xskb_list_node; }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) #define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t)) struct xsk_dma_map { dma_addr_t *dma_pages; struct device *dev; struct net_device *netdev; refcount_t users; struct list_head list; /* Protected by the RTNL_LOCK */ u32 dma_pages_cnt; }; struct xsk_buff_pool { /* Members only used in the control path first. */ struct device *dev; struct net_device *netdev; struct list_head xsk_tx_list; /* Protects modifications to the xsk_tx_list */ spinlock_t xsk_tx_list_lock; refcount_t users; struct xdp_umem *umem; struct work_struct work; struct list_head free_list; struct list_head xskb_list; u32 heads_cnt; u16 queue_id; /* Data path members as close to free_heads at the end as possible. */ struct xsk_queue *fq ____cacheline_aligned_in_smp; struct xsk_queue *cq; /* For performance reasons, each buff pool has its own array of dma_pages * even when they are identical. */ dma_addr_t *dma_pages; struct xdp_buff_xsk *heads; struct xdp_desc *tx_descs; u64 chunk_mask; u64 addrs_cnt; u32 free_list_cnt; u32 dma_pages_cnt; u32 free_heads_cnt; u32 headroom; u32 chunk_size; u32 chunk_shift; u32 frame_len; u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; bool unaligned; bool tx_sw_csum; void *addrs; /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when * sockets share a single cq when the same netdev and queue id is shared. */ spinlock_t cq_lock; struct xdp_buff_xsk *free_heads[]; }; /* Masks for xdp_umem_page flags. * The low 12-bits of the addr will be 0 since this is the page address, so we * can use them for flags. */ #define XSK_NEXT_PG_CONTIG_SHIFT 0 #define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) /* AF_XDP core. */ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id); int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_destroy(struct xsk_buff_pool *pool); void xp_get_pool(struct xsk_buff_pool *pool); bool xp_put_pool(struct xsk_buff_pool *pool); void xp_clear_dev(struct xsk_buff_pool *pool); void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); /* AF_XDP, and XDP core. */ void xp_free(struct xdp_buff_xsk *xskb); static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, u64 addr) { xskb->orig_addr = addr; xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; } static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, dma_addr_t *dma_pages, u64 addr) { xskb->frame_dma = (dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) + (addr & ~PAGE_MASK); xskb->dma = xskb->frame_dma + pool->headroom + XDP_PACKET_HEADROOM; } /* AF_XDP ZC drivers, via xdp_sock_buff.h */ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc); int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool); u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max); bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count); void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr); dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr); static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb) { return xskb->dma; } static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb) { return xskb->frame_dma; } static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) { dma_sync_single_for_cpu(xskb->pool->dev, xskb->dma, xskb->pool->frame_len, DMA_BIDIRECTIONAL); } static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL); } /* Masks for xdp_umem_page flags. * The low 12-bits of the addr will be 0 since this is the page address, so we * can use them for flags. */ #define XSK_NEXT_PG_CONTIG_SHIFT 0 #define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, u64 addr, u32 len) { bool cross_pg = (addr & (PAGE_SIZE - 1)) + len > PAGE_SIZE; if (likely(!cross_pg)) return false; return pool->dma_pages && !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK); } static inline bool xp_mb_desc(struct xdp_desc *desc) { return desc->options & XDP_PKT_CONTD; } static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr) { return addr & pool->chunk_mask; } static inline u64 xp_unaligned_extract_addr(u64 addr) { return addr & XSK_UNALIGNED_BUF_ADDR_MASK; } static inline u64 xp_unaligned_extract_offset(u64 addr) { return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; } static inline u64 xp_unaligned_add_offset_to_addr(u64 addr) { return xp_unaligned_extract_addr(addr) + xp_unaligned_extract_offset(addr); } static inline u32 xp_aligned_extract_idx(struct xsk_buff_pool *pool, u64 addr) { return xp_aligned_extract_addr(pool, addr) >> pool->chunk_shift; } static inline void xp_release(struct xdp_buff_xsk *xskb) { if (xskb->pool->unaligned) xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; } static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) { u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; offset += xskb->pool->headroom; if (!xskb->pool->unaligned) return xskb->orig_addr + offset; return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool) { return pool->tx_metadata_len > 0; } #endif /* XSK_BUFF_POOL_H_ */ |
2 2 2 2 2 2 12 11 6 2 2 15 1 5 1 15 16 15 4 15 2 2 2 1 5 5 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 | // SPDX-License-Identifier: GPL-2.0-or-later /* audit.c -- Auditing support * Gateway between the kernel (e.g., selinux) and the user-space audit daemon. * System-call specific features have moved to auditsc.c * * Copyright 2003-2007 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * * Written by Rickard E. (Rik) Faith <faith@redhat.com> * * Goals: 1) Integrate fully with Security Modules. * 2) Minimal run-time overhead: * a) Minimal when syscall auditing is disabled (audit_enable=0). * b) Small when syscall auditing is enabled and no audit record * is generated (defer as much work as possible to record * generation time): * i) context is allocated, * ii) names from getname are stored without a copy, and * iii) inode information stored from path_lookup. * 3) Ability to disable syscall auditing at boot time (audit=0). * 4) Usable by other parts of the kernel (if audit_log* is called, * then a syscall record will be generated automatically for the * current syscall). * 5) Netlink interface to user-space. * 6) Support low-overhead kernel-based filtering to minimize the * information that must be passed to user-space. * * Audit userspace, documentation, tests, and bug/issue trackers: * https://github.com/linux-audit */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/file.h> #include <linux/init.h> #include <linux/types.h> #include <linux/atomic.h> #include <linux/mm.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/kthread.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/gfp.h> #include <linux/pid.h> #include <linux/audit.h> #include <net/sock.h> #include <net/netlink.h> #include <linux/skbuff.h> #include <linux/security.h> #include <linux/freezer.h> #include <linux/pid_namespace.h> #include <net/netns/generic.h> #include "audit.h" /* No auditing will take place until audit_initialized == AUDIT_INITIALIZED. * (Initialization happens after skb_init is called.) */ #define AUDIT_DISABLED -1 #define AUDIT_UNINITIALIZED 0 #define AUDIT_INITIALIZED 1 static int audit_initialized = AUDIT_UNINITIALIZED; u32 audit_enabled = AUDIT_OFF; bool audit_ever_enabled = !!AUDIT_OFF; EXPORT_SYMBOL_GPL(audit_enabled); /* Default state when kernel boots without any parameters. */ static u32 audit_default = AUDIT_OFF; /* If auditing cannot proceed, audit_failure selects what happens. */ static u32 audit_failure = AUDIT_FAIL_PRINTK; /* private audit network namespace index */ static unsigned int audit_net_id; /** * struct audit_net - audit private network namespace data * @sk: communication socket */ struct audit_net { struct sock *sk; }; /** * struct auditd_connection - kernel/auditd connection state * @pid: auditd PID * @portid: netlink portid * @net: the associated network namespace * @rcu: RCU head * * Description: * This struct is RCU protected; you must either hold the RCU lock for reading * or the associated spinlock for writing. */ struct auditd_connection { struct pid *pid; u32 portid; struct net *net; struct rcu_head rcu; }; static struct auditd_connection __rcu *auditd_conn; static DEFINE_SPINLOCK(auditd_conn_lock); /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in * audit records being dropped. */ static u32 audit_rate_limit; /* Number of outstanding audit_buffers allowed. * When set to zero, this means unlimited. */ static u32 audit_backlog_limit = 64; #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; /* The identity of the user shutting down the audit system. */ static kuid_t audit_sig_uid = INVALID_UID; static pid_t audit_sig_pid = -1; static u32 audit_sig_sid; /* Records can be lost in several ways: 0) [suppressed in audit_alloc] 1) out of memory in audit_log_start [kmalloc of struct audit_buffer] 2) out of memory in audit_log_move [alloc_skb] 3) suppressed due to audit_rate_limit 4) suppressed due to audit_backlog_limit */ static atomic_t audit_lost = ATOMIC_INIT(0); /* Monotonically increasing sum of time the kernel has spent * waiting while the backlog limit is exceeded. */ static atomic_t audit_backlog_wait_time_actual = ATOMIC_INIT(0); /* Hash for inode-based rules */ struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; static struct kmem_cache *audit_buffer_cache; /* queue msgs to send via kauditd_task */ static struct sk_buff_head audit_queue; /* queue msgs due to temporary unicast send problems */ static struct sk_buff_head audit_retry_queue; /* queue msgs waiting for new auditd connection */ static struct sk_buff_head audit_hold_queue; /* queue servicing thread */ static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); /* waitqueue for callers who are blocked on the audit backlog */ static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, .mask = -1, .features = 0, .lock = 0,}; static char *audit_feature_names[2] = { "only_unset_loginuid", "loginuid_immutable", }; /** * struct audit_ctl_mutex - serialize requests from userspace * @lock: the mutex used for locking * @owner: the task which owns the lock * * Description: * This is the lock struct used to ensure we only process userspace requests * in an orderly fashion. We can't simply use a mutex/lock here because we * need to track lock ownership so we don't end up blocking the lock owner in * audit_log_start() or similar. */ static struct audit_ctl_mutex { struct mutex lock; void *owner; } audit_cmd_mutex; /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting * audit records. Since printk uses a 1024 byte buffer, this buffer * should be at least that large. */ #define AUDIT_BUFSIZ 1024 /* The audit_buffer is used when formatting an audit record. The caller * locks briefly to get the record off the freelist or to allocate the * buffer, and locks briefly to send the buffer to the netlink layer or * to place it on a transmit queue. Multiple audit_buffers can be in * use simultaneously. */ struct audit_buffer { struct sk_buff *skb; /* formatted skb ready to send */ struct audit_context *ctx; /* NULL or associated context */ gfp_t gfp_mask; }; struct audit_reply { __u32 portid; struct net *net; struct sk_buff *skb; }; /** * auditd_test_task - Check to see if a given task is an audit daemon * @task: the task to check * * Description: * Return 1 if the task is a registered audit daemon, 0 otherwise. */ int auditd_test_task(struct task_struct *task) { int rc; struct auditd_connection *ac; rcu_read_lock(); ac = rcu_dereference(auditd_conn); rc = (ac && ac->pid == task_tgid(task) ? 1 : 0); rcu_read_unlock(); return rc; } /** * audit_ctl_lock - Take the audit control lock */ void audit_ctl_lock(void) { mutex_lock(&audit_cmd_mutex.lock); audit_cmd_mutex.owner = current; } /** * audit_ctl_unlock - Drop the audit control lock */ void audit_ctl_unlock(void) { audit_cmd_mutex.owner = NULL; mutex_unlock(&audit_cmd_mutex.lock); } /** * audit_ctl_owner_current - Test to see if the current task owns the lock * * Description: * Return true if the current task owns the audit control lock, false if it * doesn't own the lock. */ static bool audit_ctl_owner_current(void) { return (current == audit_cmd_mutex.owner); } /** * auditd_pid_vnr - Return the auditd PID relative to the namespace * * Description: * Returns the PID in relation to the namespace, 0 on failure. */ static pid_t auditd_pid_vnr(void) { pid_t pid; const struct auditd_connection *ac; rcu_read_lock(); ac = rcu_dereference(auditd_conn); if (!ac || !ac->pid) pid = 0; else pid = pid_vnr(ac->pid); rcu_read_unlock(); return pid; } /** * audit_get_sk - Return the audit socket for the given network namespace * @net: the destination network namespace * * Description: * Returns the sock pointer if valid, NULL otherwise. The caller must ensure * that a reference is held for the network namespace while the sock is in use. */ static struct sock *audit_get_sk(const struct net *net) { struct audit_net *aunet; if (!net) return NULL; aunet = net_generic(net, audit_net_id); return aunet->sk; } void audit_panic(const char *message) { switch (audit_failure) { case AUDIT_FAIL_SILENT: break; case AUDIT_FAIL_PRINTK: if (printk_ratelimit()) pr_err("%s\n", message); break; case AUDIT_FAIL_PANIC: panic("audit: %s\n", message); break; } } static inline int audit_rate_check(void) { static unsigned long last_check = 0; static int messages = 0; static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned long now; int retval = 0; if (!audit_rate_limit) return 1; spin_lock_irqsave(&lock, flags); if (++messages < audit_rate_limit) { retval = 1; } else { now = jiffies; if (time_after(now, last_check + HZ)) { last_check = now; messages = 0; retval = 1; } } spin_unlock_irqrestore(&lock, flags); return retval; } /** * audit_log_lost - conditionally log lost audit message event * @message: the message stating reason for lost audit message * * Emit at least 1 message per second, even if audit_rate_check is * throttling. * Always increment the lost messages counter. */ void audit_log_lost(const char *message) { static unsigned long last_msg = 0; static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned long now; int print; atomic_inc(&audit_lost); print = (audit_failure == AUDIT_FAIL_PANIC || !audit_rate_limit); if (!print) { spin_lock_irqsave(&lock, flags); now = jiffies; if (time_after(now, last_msg + HZ)) { print = 1; last_msg = now; } spin_unlock_irqrestore(&lock, flags); } if (print) { if (printk_ratelimit()) pr_warn("audit_lost=%u audit_rate_limit=%u audit_backlog_limit=%u\n", atomic_read(&audit_lost), audit_rate_limit, audit_backlog_limit); audit_panic(message); } } static int audit_log_config_change(char *function_name, u32 new, u32 old, int allow_changes) { struct audit_buffer *ab; int rc = 0; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return rc; audit_log_format(ab, "op=set %s=%u old=%u ", function_name, new, old); audit_log_session_info(ab); rc = audit_log_task_context(ab); if (rc) allow_changes = 0; /* Something weird, deny request */ audit_log_format(ab, " res=%d", allow_changes); audit_log_end(ab); return rc; } static int audit_do_config_change(char *function_name, u32 *to_change, u32 new) { int allow_changes, rc = 0; u32 old = *to_change; /* check if we are locked */ if (audit_enabled == AUDIT_LOCKED) allow_changes = 0; else allow_changes = 1; if (audit_enabled != AUDIT_OFF) { rc = audit_log_config_change(function_name, new, old, allow_changes); if (rc) allow_changes = 0; } /* If we are allowed, make the change */ if (allow_changes == 1) *to_change = new; /* Not allowed, update reason */ else if (rc == 0) rc = -EPERM; return rc; } static int audit_set_rate_limit(u32 limit) { return audit_do_config_change("audit_rate_limit", &audit_rate_limit, limit); } static int audit_set_backlog_limit(u32 limit) { return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, limit); } static int audit_set_backlog_wait_time(u32 timeout) { return audit_do_config_change("audit_backlog_wait_time", &audit_backlog_wait_time, timeout); } static int audit_set_enabled(u32 state) { int rc; if (state > AUDIT_LOCKED) return -EINVAL; rc = audit_do_config_change("audit_enabled", &audit_enabled, state); if (!rc) audit_ever_enabled |= !!state; return rc; } static int audit_set_failure(u32 state) { if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK && state != AUDIT_FAIL_PANIC) return -EINVAL; return audit_do_config_change("audit_failure", &audit_failure, state); } /** * auditd_conn_free - RCU helper to release an auditd connection struct * @rcu: RCU head * * Description: * Drop any references inside the auditd connection tracking struct and free * the memory. */ static void auditd_conn_free(struct rcu_head *rcu) { struct auditd_connection *ac; ac = container_of(rcu, struct auditd_connection, rcu); put_pid(ac->pid); put_net(ac->net); kfree(ac); } /** * auditd_set - Set/Reset the auditd connection state * @pid: auditd PID * @portid: auditd netlink portid * @net: auditd network namespace pointer * @skb: the netlink command from the audit daemon * @ack: netlink ack flag, cleared if ack'd here * * Description: * This function will obtain and drop network namespace references as * necessary. Returns zero on success, negative values on failure. */ static int auditd_set(struct pid *pid, u32 portid, struct net *net, struct sk_buff *skb, bool *ack) { unsigned long flags; struct auditd_connection *ac_old, *ac_new; struct nlmsghdr *nlh; if (!pid || !net) return -EINVAL; ac_new = kzalloc(sizeof(*ac_new), GFP_KERNEL); if (!ac_new) return -ENOMEM; ac_new->pid = get_pid(pid); ac_new->portid = portid; ac_new->net = get_net(net); /* send the ack now to avoid a race with the queue backlog */ if (*ack) { nlh = nlmsg_hdr(skb); netlink_ack(skb, nlh, 0, NULL); *ack = false; } spin_lock_irqsave(&auditd_conn_lock, flags); ac_old = rcu_dereference_protected(auditd_conn, lockdep_is_held(&auditd_conn_lock)); rcu_assign_pointer(auditd_conn, ac_new); spin_unlock_irqrestore(&auditd_conn_lock, flags); if (ac_old) call_rcu(&ac_old->rcu, auditd_conn_free); return 0; } /** * kauditd_printk_skb - Print the audit record to the ring buffer * @skb: audit record * * Whatever the reason, this packet may not make it to the auditd connection * so write it via printk so the information isn't completely lost. */ static void kauditd_printk_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); char *data = nlmsg_data(nlh); if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit()) pr_notice("type=%d %s\n", nlh->nlmsg_type, data); } /** * kauditd_rehold_skb - Handle a audit record send failure in the hold queue * @skb: audit record * @error: error code (unused) * * Description: * This should only be used by the kauditd_thread when it fails to flush the * hold queue. */ static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) { /* put the record back in the queue */ skb_queue_tail(&audit_hold_queue, skb); } /** * kauditd_hold_skb - Queue an audit record, waiting for auditd * @skb: audit record * @error: error code * * Description: * Queue the audit record, waiting for an instance of auditd. When this * function is called we haven't given up yet on sending the record, but things * are not looking good. The first thing we want to do is try to write the * record via printk and then see if we want to try and hold on to the record * and queue it, if we have room. If we want to hold on to the record, but we * don't have room, record a record lost message. */ static void kauditd_hold_skb(struct sk_buff *skb, int error) { /* at this point it is uncertain if we will ever send this to auditd so * try to send the message via printk before we go any further */ kauditd_printk_skb(skb); /* can we just silently drop the message? */ if (!audit_default) goto drop; /* the hold queue is only for when the daemon goes away completely, * not -EAGAIN failures; if we are in a -EAGAIN state requeue the * record on the retry queue unless it's full, in which case drop it */ if (error == -EAGAIN) { if (!audit_backlog_limit || skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { skb_queue_tail(&audit_retry_queue, skb); return; } audit_log_lost("kauditd retry queue overflow"); goto drop; } /* if we have room in the hold queue, queue the message */ if (!audit_backlog_limit || skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { skb_queue_tail(&audit_hold_queue, skb); return; } /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); drop: kfree_skb(skb); } /** * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd * @skb: audit record * @error: error code (unused) * * Description: * Not as serious as kauditd_hold_skb() as we still have a connected auditd, * but for some reason we are having problems sending it audit records so * queue the given record and attempt to resend. */ static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) { if (!audit_backlog_limit || skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { skb_queue_tail(&audit_retry_queue, skb); return; } /* we have to drop the record, send it via printk as a last effort */ kauditd_printk_skb(skb); audit_log_lost("kauditd retry queue overflow"); kfree_skb(skb); } /** * auditd_reset - Disconnect the auditd connection * @ac: auditd connection state * * Description: * Break the auditd/kauditd connection and move all the queued records into the * hold queue in case auditd reconnects. It is important to note that the @ac * pointer should never be dereferenced inside this function as it may be NULL * or invalid, you can only compare the memory address! If @ac is NULL then * the connection will always be reset. */ static void auditd_reset(const struct auditd_connection *ac) { unsigned long flags; struct sk_buff *skb; struct auditd_connection *ac_old; /* if it isn't already broken, break the connection */ spin_lock_irqsave(&auditd_conn_lock, flags); ac_old = rcu_dereference_protected(auditd_conn, lockdep_is_held(&auditd_conn_lock)); if (ac && ac != ac_old) { /* someone already registered a new auditd connection */ spin_unlock_irqrestore(&auditd_conn_lock, flags); return; } rcu_assign_pointer(auditd_conn, NULL); spin_unlock_irqrestore(&auditd_conn_lock, flags); if (ac_old) call_rcu(&ac_old->rcu, auditd_conn_free); /* flush the retry queue to the hold queue, but don't touch the main * queue since we need to process that normally for multicast */ while ((skb = skb_dequeue(&audit_retry_queue))) kauditd_hold_skb(skb, -ECONNREFUSED); } /** * auditd_send_unicast_skb - Send a record via unicast to auditd * @skb: audit record * * Description: * Send a skb to the audit daemon, returns positive/zero values on success and * negative values on failure; in all cases the skb will be consumed by this * function. If the send results in -ECONNREFUSED the connection with auditd * will be reset. This function may sleep so callers should not hold any locks * where this would cause a problem. */ static int auditd_send_unicast_skb(struct sk_buff *skb) { int rc; u32 portid; struct net *net; struct sock *sk; struct auditd_connection *ac; /* NOTE: we can't call netlink_unicast while in the RCU section so * take a reference to the network namespace and grab local * copies of the namespace, the sock, and the portid; the * namespace and sock aren't going to go away while we hold a * reference and if the portid does become invalid after the RCU * section netlink_unicast() should safely return an error */ rcu_read_lock(); ac = rcu_dereference(auditd_conn); if (!ac) { rcu_read_unlock(); kfree_skb(skb); rc = -ECONNREFUSED; goto err; } net = get_net(ac->net); sk = audit_get_sk(net); portid = ac->portid; rcu_read_unlock(); rc = netlink_unicast(sk, skb, portid, 0); put_net(net); if (rc < 0) goto err; return rc; err: if (ac && rc == -ECONNREFUSED) auditd_reset(ac); return rc; } /** * kauditd_send_queue - Helper for kauditd_thread to flush skb queues * @sk: the sending sock * @portid: the netlink destination * @queue: the skb queue to process * @retry_limit: limit on number of netlink unicast failures * @skb_hook: per-skb hook for additional processing * @err_hook: hook called if the skb fails the netlink unicast send * * Description: * Run through the given queue and attempt to send the audit records to auditd, * returns zero on success, negative values on failure. It is up to the caller * to ensure that the @sk is valid for the duration of this function. * */ static int kauditd_send_queue(struct sock *sk, u32 portid, struct sk_buff_head *queue, unsigned int retry_limit, void (*skb_hook)(struct sk_buff *skb), void (*err_hook)(struct sk_buff *skb, int error)) { int rc = 0; struct sk_buff *skb = NULL; struct sk_buff *skb_tail; unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ skb_tail = skb_peek_tail(queue); while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { /* call the skb_hook for each skb we touch */ if (skb_hook) (*skb_hook)(skb); /* can we send to anyone via unicast? */ if (!sk) { if (err_hook) (*err_hook)(skb, -ECONNREFUSED); continue; } retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { sk = NULL; if (err_hook) (*err_hook)(skb, rc); if (rc == -EAGAIN) rc = 0; /* continue to drain the queue */ continue; } else goto retry; } else { /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } } return (rc >= 0 ? 0 : rc); } /* * kauditd_send_multicast_skb - Send a record to any multicast listeners * @skb: audit record * * Description: * Write a multicast message to anyone listening in the initial network * namespace. This function doesn't consume an skb as might be expected since * it has to copy it anyways. */ static void kauditd_send_multicast_skb(struct sk_buff *skb) { struct sk_buff *copy; struct sock *sock = audit_get_sk(&init_net); struct nlmsghdr *nlh; /* NOTE: we are not taking an additional reference for init_net since * we don't have to worry about it going away */ if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) return; /* * The seemingly wasteful skb_copy() rather than bumping the refcount * using skb_get() is necessary because non-standard mods are made to * the skb by the original kaudit unicast socket send routine. The * existing auditd daemon assumes this breakage. Fixing this would * require co-ordinating a change in the established protocol between * the kaudit kernel subsystem and the auditd userspace code. There is * no reason for new multicast clients to continue with this * non-compliance. */ copy = skb_copy(skb, GFP_KERNEL); if (!copy) return; nlh = nlmsg_hdr(copy); nlh->nlmsg_len = skb->len; nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); } /** * kauditd_thread - Worker thread to send audit records to userspace * @dummy: unused */ static int kauditd_thread(void *dummy) { int rc; u32 portid = 0; struct net *net = NULL; struct sock *sk = NULL; struct auditd_connection *ac; #define UNICAST_RETRIES 5 set_freezable(); while (!kthread_should_stop()) { /* NOTE: see the lock comments in auditd_send_unicast_skb() */ rcu_read_lock(); ac = rcu_dereference(auditd_conn); if (!ac) { rcu_read_unlock(); goto main_queue; } net = get_net(ac->net); sk = audit_get_sk(net); portid = ac->portid; rcu_read_unlock(); /* attempt to flush the hold queue */ rc = kauditd_send_queue(sk, portid, &audit_hold_queue, UNICAST_RETRIES, NULL, kauditd_rehold_skb); if (rc < 0) { sk = NULL; auditd_reset(ac); goto main_queue; } /* attempt to flush the retry queue */ rc = kauditd_send_queue(sk, portid, &audit_retry_queue, UNICAST_RETRIES, NULL, kauditd_hold_skb); if (rc < 0) { sk = NULL; auditd_reset(ac); goto main_queue; } main_queue: /* process the main queue - do the multicast send and attempt * unicast, dump failed record sends to the retry queue; if * sk == NULL due to previous failures we will just do the * multicast send and move the record to the hold queue */ rc = kauditd_send_queue(sk, portid, &audit_queue, 1, kauditd_send_multicast_skb, (sk ? kauditd_retry_skb : kauditd_hold_skb)); if (ac && rc < 0) auditd_reset(ac); sk = NULL; /* drop our netns reference, no auditd sends past this line */ if (net) { put_net(net); net = NULL; } /* we have processed all the queues so wake everyone */ wake_up(&audit_backlog_wait); /* NOTE: we want to wake up if there is anything on the queue, * regardless of if an auditd is connected, as we need to * do the multicast send and rotate records from the * main queue to the retry/hold queues */ wait_event_freezable(kauditd_wait, (skb_queue_len(&audit_queue) ? 1 : 0)); } return 0; } int audit_send_list_thread(void *_dest) { struct audit_netlink_list *dest = _dest; struct sk_buff *skb; struct sock *sk = audit_get_sk(dest->net); /* wait for parent to finish and send an ACK */ audit_ctl_lock(); audit_ctl_unlock(); while ((skb = __skb_dequeue(&dest->q)) != NULL) netlink_unicast(sk, skb, dest->portid, 0); put_net(dest->net); kfree(dest); return 0; } struct sk_buff *audit_make_reply(int seq, int type, int done, int multi, const void *payload, int size) { struct sk_buff *skb; struct nlmsghdr *nlh; void *data; int flags = multi ? NLM_F_MULTI : 0; int t = done ? NLMSG_DONE : type; skb = nlmsg_new(size, GFP_KERNEL); if (!skb) return NULL; nlh = nlmsg_put(skb, 0, seq, t, size, flags); if (!nlh) goto out_kfree_skb; data = nlmsg_data(nlh); memcpy(data, payload, size); return skb; out_kfree_skb: kfree_skb(skb); return NULL; } static void audit_free_reply(struct audit_reply *reply) { if (!reply) return; kfree_skb(reply->skb); if (reply->net) put_net(reply->net); kfree(reply); } static int audit_send_reply_thread(void *arg) { struct audit_reply *reply = (struct audit_reply *)arg; audit_ctl_lock(); audit_ctl_unlock(); /* Ignore failure. It'll only happen if the sender goes away, because our timeout is set to infinite. */ netlink_unicast(audit_get_sk(reply->net), reply->skb, reply->portid, 0); reply->skb = NULL; audit_free_reply(reply); return 0; } /** * audit_send_reply - send an audit reply message via netlink * @request_skb: skb of request we are replying to (used to target the reply) * @seq: sequence number * @type: audit message type * @done: done (last) flag * @multi: multi-part message flag * @payload: payload data * @size: payload size * * Allocates a skb, builds the netlink message, and sends it to the port id. */ static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done, int multi, const void *payload, int size) { struct task_struct *tsk; struct audit_reply *reply; reply = kzalloc(sizeof(*reply), GFP_KERNEL); if (!reply) return; reply->skb = audit_make_reply(seq, type, done, multi, payload, size); if (!reply->skb) goto err; reply->net = get_net(sock_net(NETLINK_CB(request_skb).sk)); reply->portid = NETLINK_CB(request_skb).portid; tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); if (IS_ERR(tsk)) goto err; return; err: audit_free_reply(reply); } /* * Check for appropriate CAP_AUDIT_ capabilities on incoming audit * control messages. */ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) { int err = 0; /* Only support initial user namespace for now. */ /* * We return ECONNREFUSED because it tricks userspace into thinking * that audit was not configured into the kernel. Lots of users * configure their PAM stack (because that's what the distro does) * to reject login if unable to send messages to audit. If we return * ECONNREFUSED the PAM stack thinks the kernel does not have audit * configured in and will let login proceed. If we return EPERM * userspace will reject all logins. This should be removed when we * support non init namespaces!! */ if (current_user_ns() != &init_user_ns) return -ECONNREFUSED; switch (msg_type) { case AUDIT_LIST: case AUDIT_ADD: case AUDIT_DEL: return -EOPNOTSUPP; case AUDIT_GET: case AUDIT_SET: case AUDIT_GET_FEATURE: case AUDIT_SET_FEATURE: case AUDIT_LIST_RULES: case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: case AUDIT_SIGNAL_INFO: case AUDIT_TTY_GET: case AUDIT_TTY_SET: case AUDIT_TRIM: case AUDIT_MAKE_EQUIV: /* Only support auditd and auditctl in initial pid namespace * for now. */ if (task_active_pid_ns(current) != &init_pid_ns) return -EPERM; if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) err = -EPERM; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: if (!netlink_capable(skb, CAP_AUDIT_WRITE)) err = -EPERM; break; default: /* bad msg */ err = -EINVAL; } return err; } static void audit_log_common_recv_msg(struct audit_context *context, struct audit_buffer **ab, u16 msg_type) { uid_t uid = from_kuid(&init_user_ns, current_uid()); pid_t pid = task_tgid_nr(current); if (!audit_enabled && msg_type != AUDIT_USER_AVC) { *ab = NULL; return; } *ab = audit_log_start(context, GFP_KERNEL, msg_type); if (unlikely(!*ab)) return; audit_log_format(*ab, "pid=%d uid=%u ", pid, uid); audit_log_session_info(*ab); audit_log_task_context(*ab); } static inline void audit_log_user_recv_msg(struct audit_buffer **ab, u16 msg_type) { audit_log_common_recv_msg(NULL, ab, msg_type); } static int is_audit_feature_set(int i) { return af.features & AUDIT_FEATURE_TO_MASK(i); } static int audit_get_feature(struct sk_buff *skb) { u32 seq; seq = nlmsg_hdr(skb)->nlmsg_seq; audit_send_reply(skb, seq, AUDIT_GET_FEATURE, 0, 0, &af, sizeof(af)); return 0; } static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature, u32 old_lock, u32 new_lock, int res) { struct audit_buffer *ab; if (audit_enabled == AUDIT_OFF) return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_FEATURE_CHANGE); if (!ab) return; audit_log_task_info(ab); audit_log_format(ab, " feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", audit_feature_names[which], !!old_feature, !!new_feature, !!old_lock, !!new_lock, res); audit_log_end(ab); } static int audit_set_feature(struct audit_features *uaf) { int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); /* if there is ever a version 2 we should handle that here */ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { u32 feature = AUDIT_FEATURE_TO_MASK(i); u32 old_feature, new_feature, old_lock, new_lock; /* if we are not changing this feature, move along */ if (!(feature & uaf->mask)) continue; old_feature = af.features & feature; new_feature = uaf->features & feature; new_lock = (uaf->lock | af.lock) & feature; old_lock = af.lock & feature; /* are we changing a locked feature? */ if (old_lock && (new_feature != old_feature)) { audit_log_feature_change(i, old_feature, new_feature, old_lock, new_lock, 0); return -EPERM; } } /* nothing invalid, do the changes */ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { u32 feature = AUDIT_FEATURE_TO_MASK(i); u32 old_feature, new_feature, old_lock, new_lock; /* if we are not changing this feature, move along */ if (!(feature & uaf->mask)) continue; old_feature = af.features & feature; new_feature = uaf->features & feature; old_lock = af.lock & feature; new_lock = (uaf->lock | af.lock) & feature; if (new_feature != old_feature) audit_log_feature_change(i, old_feature, new_feature, old_lock, new_lock, 1); if (new_feature) af.features |= feature; else af.features &= ~feature; af.lock |= new_lock; } return 0; } static int audit_replace(struct pid *pid) { pid_t pvnr; struct sk_buff *skb; pvnr = pid_vnr(pid); skb = audit_make_reply(0, AUDIT_REPLACE, 0, 0, &pvnr, sizeof(pvnr)); if (!skb) return -ENOMEM; return auditd_send_unicast_skb(skb); } static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, bool *ack) { u32 seq; void *data; int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; struct audit_sig_info *sig_data; char *ctx = NULL; u32 len; err = audit_netlink_ok(skb, msg_type); if (err) return err; seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { struct audit_status s; memset(&s, 0, sizeof(s)); s.enabled = audit_enabled; s.failure = audit_failure; /* NOTE: use pid_vnr() so the PID is relative to the current * namespace */ s.pid = auditd_pid_vnr(); s.rate_limit = audit_rate_limit; s.backlog_limit = audit_backlog_limit; s.lost = atomic_read(&audit_lost); s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; s.backlog_wait_time = audit_backlog_wait_time; s.backlog_wait_time_actual = atomic_read(&audit_backlog_wait_time_actual); audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_SET: { struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_FAILURE) { err = audit_set_failure(s.failure); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_PID) { /* NOTE: we are using the vnr PID functions below * because the s.pid value is relative to the * namespace of the caller; at present this * doesn't matter much since you can really only * run auditd from the initial pid namespace, but * something to keep in mind if this changes */ pid_t new_pid = s.pid; pid_t auditd_pid; struct pid *req_pid = task_tgid(current); /* Sanity check - PID values must match. Setting * pid to 0 is how auditd ends auditing. */ if (new_pid && (new_pid != pid_vnr(req_pid))) return -EINVAL; /* test the auditd connection */ audit_replace(req_pid); auditd_pid = auditd_pid_vnr(); if (auditd_pid) { /* replacing a healthy auditd is not allowed */ if (new_pid) { audit_log_config_change("audit_pid", new_pid, auditd_pid, 0); return -EEXIST; } /* only current auditd can unregister itself */ if (pid_vnr(req_pid) != auditd_pid) { audit_log_config_change("audit_pid", new_pid, auditd_pid, 0); return -EACCES; } } if (new_pid) { /* register a new auditd connection */ err = auditd_set(req_pid, NETLINK_CB(skb).portid, sock_net(NETLINK_CB(skb).sk), skb, ack); if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, auditd_pid, err ? 0 : 1); if (err) return err; /* try to process any backlog */ wake_up_interruptible(&kauditd_wait); } else { if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, auditd_pid, 1); /* unregister the auditd connection */ auditd_reset(NULL); } } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(s.rate_limit); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_BACKLOG_LIMIT) { err = audit_set_backlog_limit(s.backlog_limit); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_BACKLOG_WAIT_TIME) { if (sizeof(s) > (size_t)nlh->nlmsg_len) return -EINVAL; if (s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME) return -EINVAL; err = audit_set_backlog_wait_time(s.backlog_wait_time); if (err < 0) return err; } if (s.mask == AUDIT_STATUS_LOST) { u32 lost = atomic_xchg(&audit_lost, 0); audit_log_config_change("lost", 0, lost, 1); return lost; } if (s.mask == AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL) { u32 actual = atomic_xchg(&audit_backlog_wait_time_actual, 0); audit_log_config_change("backlog_wait_time_actual", 0, actual, 1); return actual; } break; } case AUDIT_GET_FEATURE: err = audit_get_feature(skb); if (err) return err; break; case AUDIT_SET_FEATURE: if (data_len < sizeof(struct audit_features)) return -EINVAL; err = audit_set_feature(data); if (err) return err; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; /* exit early if there isn't at least one character to print */ if (data_len < 2) return -EINVAL; err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ char *str = data; err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); if (err) break; } audit_log_user_recv_msg(&ab, msg_type); if (msg_type != AUDIT_USER_TTY) { /* ensure NULL termination */ str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, str); } else { audit_log_format(ab, " data="); if (str[data_len - 1] == '\0') data_len--; audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=%s audit_enabled=%d res=0", msg_type == AUDIT_ADD_RULE ? "add_rule" : "remove_rule", audit_enabled); audit_log_end(ab); return -EPERM; } err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); break; case AUDIT_TRIM: audit_trim_trees(); audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=trim res=1"); audit_log_end(ab); break; case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; size_t msglen = data_len; char *old, *new; err = -EINVAL; if (msglen < 2 * sizeof(u32)) break; memcpy(sizes, bufp, 2 * sizeof(u32)); bufp += 2 * sizeof(u32); msglen -= 2 * sizeof(u32); old = audit_unpack_string(&bufp, &msglen, sizes[0]); if (IS_ERR(old)) { err = PTR_ERR(old); break; } new = audit_unpack_string(&bufp, &msglen, sizes[1]); if (IS_ERR(new)) { err = PTR_ERR(new); kfree(old); break; } /* OK, here comes... */ err = audit_tag_tree(old, new); audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=make_equiv old="); audit_log_untrustedstring(ab, old); audit_log_format(ab, " new="); audit_log_untrustedstring(ab, new); audit_log_format(ab, " res=%d", !err); audit_log_end(ab); kfree(old); kfree(new); break; } case AUDIT_SIGNAL_INFO: len = 0; if (audit_sig_sid) { err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); if (err) return err; } sig_data = kmalloc(struct_size(sig_data, ctx, len), GFP_KERNEL); if (!sig_data) { if (audit_sig_sid) security_release_secctx(ctx, len); return -ENOMEM; } sig_data->uid = from_kuid(&init_user_ns, audit_sig_uid); sig_data->pid = audit_sig_pid; if (audit_sig_sid) { memcpy(sig_data->ctx, ctx, len); security_release_secctx(ctx, len); } audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, struct_size(sig_data, ctx, len)); kfree(sig_data); break; case AUDIT_TTY_GET: { struct audit_tty_status s; unsigned int t; t = READ_ONCE(current->signal->audit_tty); s.enabled = t & AUDIT_TTY_ENABLE; s.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD); audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_TTY_SET: { struct audit_tty_status s, old; struct audit_buffer *ab; unsigned int t; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) err = -EINVAL; if (err) t = READ_ONCE(current->signal->audit_tty); else { t = s.enabled | (-s.log_passwd & AUDIT_TTY_LOG_PASSWD); t = xchg(¤t->signal->audit_tty, t); } old.enabled = t & AUDIT_TTY_ENABLE; old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD); audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d" " old-log_passwd=%d new-log_passwd=%d res=%d", old.enabled, s.enabled, old.log_passwd, s.log_passwd, !err); audit_log_end(ab); break; } default: err = -EINVAL; break; } return err < 0 ? err : 0; } /** * audit_receive - receive messages from a netlink control socket * @skb: the message buffer * * Parse the provided skb and deal with any messages that may be present, * malformed skbs are discarded. */ static void audit_receive(struct sk_buff *skb) { struct nlmsghdr *nlh; bool ack; /* * len MUST be signed for nlmsg_next to be able to dec it below 0 * if the nlmsg_len was not aligned */ int len; int err; nlh = nlmsg_hdr(skb); len = skb->len; audit_ctl_lock(); while (nlmsg_ok(nlh, len)) { ack = nlh->nlmsg_flags & NLM_F_ACK; err = audit_receive_msg(skb, nlh, &ack); /* send an ack if the user asked for one and audit_receive_msg * didn't already do it, or if there was an error. */ if (ack || err) netlink_ack(skb, nlh, err, NULL); nlh = nlmsg_next(nlh, &len); } audit_ctl_unlock(); /* can't block with the ctrl lock, so penalize the sender now */ if (audit_backlog_limit && (skb_queue_len(&audit_queue) > audit_backlog_limit)) { DECLARE_WAITQUEUE(wait, current); /* wake kauditd to try and flush the queue */ wake_up_interruptible(&kauditd_wait); add_wait_queue_exclusive(&audit_backlog_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(audit_backlog_wait_time); remove_wait_queue(&audit_backlog_wait, &wait); } } /* Log information about who is connecting to the audit multicast socket */ static void audit_log_multicast(int group, const char *op, int err) { const struct cred *cred; struct tty_struct *tty; char comm[sizeof(current->comm)]; struct audit_buffer *ab; if (!audit_enabled) return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_EVENT_LISTENER); if (!ab) return; cred = current_cred(); tty = audit_get_tty(); audit_log_format(ab, "pid=%u uid=%u auid=%u tty=%s ses=%u", task_tgid_nr(current), from_kuid(&init_user_ns, cred->uid), from_kuid(&init_user_ns, audit_get_loginuid(current)), tty ? tty_name(tty) : "(none)", audit_get_sessionid(current)); audit_put_tty(tty); audit_log_task_context(ab); /* subj= */ audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, current)); audit_log_d_path_exe(ab, current->mm); /* exe= */ audit_log_format(ab, " nl-mcgrp=%d op=%s res=%d", group, op, !err); audit_log_end(ab); } /* Run custom bind function on netlink socket group connect or bind requests. */ static int audit_multicast_bind(struct net *net, int group) { int err = 0; if (!capable(CAP_AUDIT_READ)) err = -EPERM; audit_log_multicast(group, "connect", err); return err; } static void audit_multicast_unbind(struct net *net, int group) { audit_log_multicast(group, "disconnect", 0); } static int __net_init audit_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = audit_receive, .bind = audit_multicast_bind, .unbind = audit_multicast_unbind, .flags = NL_CFG_F_NONROOT_RECV, .groups = AUDIT_NLGRP_MAX, }; struct audit_net *aunet = net_generic(net, audit_net_id); aunet->sk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg); if (aunet->sk == NULL) { audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } /* limit the timeout in case auditd is blocked/stopped */ aunet->sk->sk_sndtimeo = HZ / 10; return 0; } static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); /* NOTE: you would think that we would want to check the auditd * connection and potentially reset it here if it lives in this * namespace, but since the auditd connection tracking struct holds a * reference to this namespace (see auditd_set()) we are only ever * going to get here after that connection has been released */ netlink_kernel_release(aunet->sk); } static struct pernet_operations audit_net_ops __net_initdata = { .init = audit_net_init, .exit = audit_net_exit, .id = &audit_net_id, .size = sizeof(struct audit_net), }; /* Initialize audit support at boot time. */ static int __init audit_init(void) { int i; if (audit_initialized == AUDIT_DISABLED) return 0; audit_buffer_cache = KMEM_CACHE(audit_buffer, SLAB_PANIC); skb_queue_head_init(&audit_queue); skb_queue_head_init(&audit_retry_queue); skb_queue_head_init(&audit_hold_queue); for (i = 0; i < AUDIT_INODE_BUCKETS; i++) INIT_LIST_HEAD(&audit_inode_hash[i]); mutex_init(&audit_cmd_mutex.lock); audit_cmd_mutex.owner = NULL; pr_info("initializing netlink subsys (%s)\n", str_enabled_disabled(audit_default)); register_pernet_subsys(&audit_net_ops); audit_initialized = AUDIT_INITIALIZED; kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); if (IS_ERR(kauditd_task)) { int err = PTR_ERR(kauditd_task); panic("audit: failed to start the kauditd thread (%d)\n", err); } audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "state=initialized audit_enabled=%u res=1", audit_enabled); return 0; } postcore_initcall(audit_init); /* * Process kernel command-line parameter at boot time. * audit={0|off} or audit={1|on}. */ static int __init audit_enable(char *str) { if (!strcasecmp(str, "off") || !strcmp(str, "0")) audit_default = AUDIT_OFF; else if (!strcasecmp(str, "on") || !strcmp(str, "1")) audit_default = AUDIT_ON; else { pr_err("audit: invalid 'audit' parameter value (%s)\n", str); audit_default = AUDIT_ON; } if (audit_default == AUDIT_OFF) audit_initialized = AUDIT_DISABLED; if (audit_set_enabled(audit_default)) pr_err("audit: error setting audit state (%d)\n", audit_default); pr_info("%s\n", audit_default ? "enabled (after initialization)" : "disabled (until reboot)"); return 1; } __setup("audit=", audit_enable); /* Process kernel command-line parameter at boot time. * audit_backlog_limit=<n> */ static int __init audit_backlog_limit_set(char *str) { u32 audit_backlog_limit_arg; pr_info("audit_backlog_limit: "); if (kstrtouint(str, 0, &audit_backlog_limit_arg)) { pr_cont("using default of %u, unable to parse %s\n", audit_backlog_limit, str); return 1; } audit_backlog_limit = audit_backlog_limit_arg; pr_cont("%d\n", audit_backlog_limit); return 1; } __setup("audit_backlog_limit=", audit_backlog_limit_set); static void audit_buffer_free(struct audit_buffer *ab) { if (!ab) return; kfree_skb(ab->skb); kmem_cache_free(audit_buffer_cache, ab); } static struct audit_buffer *audit_buffer_alloc(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab; ab = kmem_cache_alloc(audit_buffer_cache, gfp_mask); if (!ab) return NULL; ab->skb = nlmsg_new(AUDIT_BUFSIZ, gfp_mask); if (!ab->skb) goto err; if (!nlmsg_put(ab->skb, 0, 0, type, 0, 0)) goto err; ab->ctx = ctx; ab->gfp_mask = gfp_mask; return ab; err: audit_buffer_free(ab); return NULL; } /** * audit_serial - compute a serial number for the audit record * * Compute a serial number for the audit record. Audit records are * written to user-space as soon as they are generated, so a complete * audit record may be written in several pieces. The timestamp of the * record and this serial number are used by the user-space tools to * determine which pieces belong to the same audit record. The * (timestamp,serial) tuple is unique for each syscall and is live from * syscall entry to syscall exit. * * NOTE: Another possibility is to store the formatted records off the * audit context (for those records that have a context), and emit them * all at syscall exit. However, this could delay the reporting of * significant errors until syscall exit (or never, if the system * halts). */ unsigned int audit_serial(void) { static atomic_t serial = ATOMIC_INIT(0); return atomic_inc_return(&serial); } static inline void audit_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial) { if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { ktime_get_coarse_real_ts64(t); *serial = audit_serial(); } } /** * audit_log_start - obtain an audit buffer * @ctx: audit_context (may be NULL) * @gfp_mask: type of allocation * @type: audit message type * * Returns audit_buffer pointer on success or NULL on error. * * Obtain an audit buffer. This routine does locking to obtain the * audit buffer, but then no locking is required for calls to * audit_log_*format. If the task (ctx) is a task that is currently in a * syscall, then the syscall is marked as auditable and an audit record * will be written at syscall exit. If there is no associated task, then * task context (ctx) should be NULL. */ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab; struct timespec64 t; unsigned int serial; if (audit_initialized != AUDIT_INITIALIZED) return NULL; if (unlikely(!audit_filter(type, AUDIT_FILTER_EXCLUDE))) return NULL; /* NOTE: don't ever fail/sleep on these two conditions: * 1. auditd generated record - since we need auditd to drain the * queue; also, when we are checking for auditd, compare PIDs using * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() * using a PID anchored in the caller's namespace * 2. generator holding the audit_cmd_mutex - we don't want to block * while holding the mutex, although we do penalize the sender * later in audit_receive() when it is safe to block */ if (!(auditd_test_task(current) || audit_ctl_owner_current())) { long stime = audit_backlog_wait_time; while (audit_backlog_limit && (skb_queue_len(&audit_queue) > audit_backlog_limit)) { /* wake kauditd to try and flush the queue */ wake_up_interruptible(&kauditd_wait); /* sleep if we are allowed and we haven't exhausted our * backlog wait limit */ if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) { long rtime = stime; DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&audit_backlog_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); stime = schedule_timeout(rtime); atomic_add(rtime - stime, &audit_backlog_wait_time_actual); remove_wait_queue(&audit_backlog_wait, &wait); } else { if (audit_rate_check() && printk_ratelimit()) pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", skb_queue_len(&audit_queue), audit_backlog_limit); audit_log_lost("backlog limit exceeded"); return NULL; } } } ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { audit_log_lost("out of memory in audit_log_start"); return NULL; } audit_get_stamp(ab->ctx, &t, &serial); /* cancel dummy context to enable supporting records */ if (ctx) ctx->dummy = 0; audit_log_format(ab, "audit(%llu.%03lu:%u): ", (unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial); return ab; } /** * audit_expand - expand skb in the audit buffer * @ab: audit_buffer * @extra: space to add at tail of the skb * * Returns 0 (no space) on failed expansion, or available space if * successful. */ static inline int audit_expand(struct audit_buffer *ab, int extra) { struct sk_buff *skb = ab->skb; int oldtail = skb_tailroom(skb); int ret = pskb_expand_head(skb, 0, extra, ab->gfp_mask); int newtail = skb_tailroom(skb); if (ret < 0) { audit_log_lost("out of memory in audit_expand"); return 0; } skb->truesize += newtail - oldtail; return newtail; } /* * Format an audit message into the audit buffer. If there isn't enough * room in the audit buffer, more room will be allocated and vsnprint * will be called a second time. Currently, we assume that a printk * can't format message larger than 1024 bytes, so we don't either. */ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, va_list args) { int len, avail; struct sk_buff *skb; va_list args2; if (!ab) return; BUG_ON(!ab->skb); skb = ab->skb; avail = skb_tailroom(skb); if (avail == 0) { avail = audit_expand(ab, AUDIT_BUFSIZ); if (!avail) goto out; } va_copy(args2, args); len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args); if (len >= avail) { /* The printk buffer is 1024 bytes long, so if we get * here and AUDIT_BUFSIZ is at least 1024, then we can * log everything that printk could have logged. */ avail = audit_expand(ab, max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); if (!avail) goto out_va_end; len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); } if (len > 0) skb_put(skb, len); out_va_end: va_end(args2); out: return; } /** * audit_log_format - format a message into the audit buffer. * @ab: audit_buffer * @fmt: format string * @...: optional parameters matching @fmt string * * All the work is done in audit_log_vformat. */ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) { va_list args; if (!ab) return; va_start(args, fmt); audit_log_vformat(ab, fmt, args); va_end(args); } /** * audit_log_n_hex - convert a buffer to hex and append it to the audit skb * @ab: the audit_buffer * @buf: buffer to convert to hex * @len: length of @buf to be converted * * No return value; failure to expand is silently ignored. * * This function will take the passed buf and convert it into a string of * ascii hex digits. The new string is placed onto the skb. */ void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len) { int i, avail, new_len; unsigned char *ptr; struct sk_buff *skb; if (!ab) return; BUG_ON(!ab->skb); skb = ab->skb; avail = skb_tailroom(skb); new_len = len<<1; if (new_len >= avail) { /* Round the buffer request up to the next multiple */ new_len = AUDIT_BUFSIZ*(((new_len-avail)/AUDIT_BUFSIZ) + 1); avail = audit_expand(ab, new_len); if (!avail) return; } ptr = skb_tail_pointer(skb); for (i = 0; i < len; i++) ptr = hex_byte_pack_upper(ptr, buf[i]); *ptr = 0; skb_put(skb, len << 1); /* new string is twice the old string */ } /* * Format a string of no more than slen characters into the audit buffer, * enclosed in quote marks. */ void audit_log_n_string(struct audit_buffer *ab, const char *string, size_t slen) { int avail, new_len; unsigned char *ptr; struct sk_buff *skb; if (!ab) return; BUG_ON(!ab->skb); skb = ab->skb; avail = skb_tailroom(skb); new_len = slen + 3; /* enclosing quotes + null terminator */ if (new_len > avail) { avail = audit_expand(ab, new_len); if (!avail) return; } ptr = skb_tail_pointer(skb); *ptr++ = '"'; memcpy(ptr, string, slen); ptr += slen; *ptr++ = '"'; *ptr = 0; skb_put(skb, slen + 2); /* don't include null terminator */ } /** * audit_string_contains_control - does a string need to be logged in hex * @string: string to be checked * @len: max length of the string to check */ bool audit_string_contains_control(const char *string, size_t len) { const unsigned char *p; for (p = string; p < (const unsigned char *)string + len; p++) { if (*p == '"' || *p < 0x21 || *p > 0x7e) return true; } return false; } /** * audit_log_n_untrustedstring - log a string that may contain random characters * @ab: audit_buffer * @len: length of string (not including trailing null) * @string: string to be logged * * This code will escape a string that is passed to it if the string * contains a control character, unprintable character, double quote mark, * or a space. Unescaped strings will start and end with a double quote mark. * Strings that are escaped are printed in hex (2 digits per char). * * The caller specifies the number of characters in the string to log, which may * or may not be the entire string. */ void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, size_t len) { if (audit_string_contains_control(string, len)) audit_log_n_hex(ab, string, len); else audit_log_n_string(ab, string, len); } /** * audit_log_untrustedstring - log a string that may contain random characters * @ab: audit_buffer * @string: string to be logged * * Same as audit_log_n_untrustedstring(), except that strlen is used to * determine string length. */ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) { audit_log_n_untrustedstring(ab, string, strlen(string)); } /* This is a helper-function to print the escaped d_path */ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, const struct path *path) { char *p, *pathname; if (prefix) audit_log_format(ab, "%s", prefix); /* We will allow 11 spaces for ' (deleted)' to be appended */ pathname = kmalloc(PATH_MAX+11, ab->gfp_mask); if (!pathname) { audit_log_format(ab, "\"<no_memory>\""); return; } p = d_path(path, pathname, PATH_MAX+11); if (IS_ERR(p)) { /* Should never happen since we send PATH_MAX */ /* FIXME: can we save some information here? */ audit_log_format(ab, "\"<too_long>\""); } else audit_log_untrustedstring(ab, p); kfree(pathname); } void audit_log_session_info(struct audit_buffer *ab) { unsigned int sessionid = audit_get_sessionid(current); uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current)); audit_log_format(ab, "auid=%u ses=%u", auid, sessionid); } void audit_log_key(struct audit_buffer *ab, char *key) { audit_log_format(ab, " key="); if (key) audit_log_untrustedstring(ab, key); else audit_log_format(ab, "(null)"); } int audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; unsigned len; int error; u32 sid; security_current_getsecid_subj(&sid); if (!sid) return 0; error = security_secid_to_secctx(sid, &ctx, &len); if (error) { if (error != -EINVAL) goto error_path; return 0; } audit_log_format(ab, " subj=%s", ctx); security_release_secctx(ctx, len); return 0; error_path: audit_panic("error in audit_log_task_context"); return error; } EXPORT_SYMBOL(audit_log_task_context); void audit_log_d_path_exe(struct audit_buffer *ab, struct mm_struct *mm) { struct file *exe_file; if (!mm) goto out_null; exe_file = get_mm_exe_file(mm); if (!exe_file) goto out_null; audit_log_d_path(ab, " exe=", &exe_file->f_path); fput(exe_file); return; out_null: audit_log_format(ab, " exe=(null)"); } struct tty_struct *audit_get_tty(void) { struct tty_struct *tty = NULL; unsigned long flags; spin_lock_irqsave(¤t->sighand->siglock, flags); if (current->signal) tty = tty_kref_get(current->signal->tty); spin_unlock_irqrestore(¤t->sighand->siglock, flags); return tty; } void audit_put_tty(struct tty_struct *tty) { tty_kref_put(tty); } void audit_log_task_info(struct audit_buffer *ab) { const struct cred *cred; char comm[sizeof(current->comm)]; struct tty_struct *tty; if (!ab) return; cred = current_cred(); tty = audit_get_tty(); audit_log_format(ab, " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", task_ppid_nr(current), task_tgid_nr(current), from_kuid(&init_user_ns, audit_get_loginuid(current)), from_kuid(&init_user_ns, cred->uid), from_kgid(&init_user_ns, cred->gid), from_kuid(&init_user_ns, cred->euid), from_kuid(&init_user_ns, cred->suid), from_kuid(&init_user_ns, cred->fsuid), from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), tty ? tty_name(tty) : "(none)", audit_get_sessionid(current)); audit_put_tty(tty); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, current)); audit_log_d_path_exe(ab, current->mm); audit_log_task_context(ab); } EXPORT_SYMBOL(audit_log_task_info); /** * audit_log_path_denied - report a path restriction denial * @type: audit message type (AUDIT_ANOM_LINK, AUDIT_ANOM_CREAT, etc) * @operation: specific operation name */ void audit_log_path_denied(int type, const char *operation) { struct audit_buffer *ab; if (!audit_enabled || audit_dummy_context()) return; /* Generate log with subject, operation, outcome. */ ab = audit_log_start(audit_context(), GFP_KERNEL, type); if (!ab) return; audit_log_format(ab, "op=%s", operation); audit_log_task_info(ab); audit_log_format(ab, " res=0"); audit_log_end(ab); } /* global counter which is incremented every time something logs in */ static atomic_t session_id = ATOMIC_INIT(0); static int audit_set_loginuid_perm(kuid_t loginuid) { /* if we are unset, we don't need privs */ if (!audit_loginuid_set(current)) return 0; /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) return -EPERM; /* it is set, you need permission */ if (!capable(CAP_AUDIT_CONTROL)) return -EPERM; /* reject if this is not an unset and we don't allow that */ if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid)) return -EPERM; return 0; } static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, unsigned int oldsessionid, unsigned int sessionid, int rc) { struct audit_buffer *ab; uid_t uid, oldloginuid, loginuid; struct tty_struct *tty; if (!audit_enabled) return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid); tty = audit_get_tty(); audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); audit_log_task_context(ab); audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", oldsessionid, sessionid, !rc); audit_put_tty(tty); audit_log_end(ab); } /** * audit_set_loginuid - set current task's loginuid * @loginuid: loginuid value * * Returns 0. * * Called (set) from fs/proc/base.c::proc_loginuid_write(). */ int audit_set_loginuid(kuid_t loginuid) { unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET; kuid_t oldloginuid; int rc; oldloginuid = audit_get_loginuid(current); oldsessionid = audit_get_sessionid(current); rc = audit_set_loginuid_perm(loginuid); if (rc) goto out; /* are we setting or clearing? */ if (uid_valid(loginuid)) { sessionid = (unsigned int)atomic_inc_return(&session_id); if (unlikely(sessionid == AUDIT_SID_UNSET)) sessionid = (unsigned int)atomic_inc_return(&session_id); } current->sessionid = sessionid; current->loginuid = loginuid; out: audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); return rc; } /** * audit_signal_info - record signal info for shutting down audit subsystem * @sig: signal value * @t: task being signaled * * If the audit subsystem is being terminated, record the task (pid) * and uid that is doing that. */ int audit_signal_info(int sig, struct task_struct *t) { kuid_t uid = current_uid(), auid; if (auditd_test_task(t) && (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2)) { audit_sig_pid = task_tgid_nr(current); auid = audit_get_loginuid(current); if (uid_valid(auid)) audit_sig_uid = auid; else audit_sig_uid = uid; security_current_getsecid_subj(&audit_sig_sid); } return audit_signal_info_syscall(t); } /** * audit_log_end - end one audit record * @ab: the audit_buffer * * We can not do a netlink send inside an irq context because it blocks (last * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a * queue and a kthread is scheduled to remove them from the queue outside the * irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) { struct sk_buff *skb; struct nlmsghdr *nlh; if (!ab) return; if (audit_rate_check()) { skb = ab->skb; ab->skb = NULL; /* setup the netlink header, see the comments in * kauditd_send_multicast_skb() for length quirks */ nlh = nlmsg_hdr(skb); nlh->nlmsg_len = skb->len - NLMSG_HDRLEN; /* queue the netlink packet and poke the kauditd thread */ skb_queue_tail(&audit_queue, skb); wake_up_interruptible(&kauditd_wait); } else audit_log_lost("rate limit exceeded"); audit_buffer_free(ab); } /** * audit_log - Log an audit record * @ctx: audit context * @gfp_mask: type of allocation * @type: audit message type * @fmt: format string to use * @...: variable parameters matching the format string * * This is a convenience function that calls audit_log_start, * audit_log_vformat, and audit_log_end. It may be called * in any context. */ void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...) { struct audit_buffer *ab; va_list args; ab = audit_log_start(ctx, gfp_mask, type); if (ab) { va_start(args, fmt); audit_log_vformat(ab, fmt, args); va_end(args); audit_log_end(ab); } } EXPORT_SYMBOL(audit_log_start); EXPORT_SYMBOL(audit_log_end); EXPORT_SYMBOL(audit_log_format); EXPORT_SYMBOL(audit_log); |
118 118 118 3 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM writeback #if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_WRITEBACK_H #include <linux/tracepoint.h> #include <linux/backing-dev.h> #include <linux/writeback.h> #define show_inode_state(state) \ __print_flags(state, "|", \ {I_DIRTY_SYNC, "I_DIRTY_SYNC"}, \ {I_DIRTY_DATASYNC, "I_DIRTY_DATASYNC"}, \ {I_DIRTY_PAGES, "I_DIRTY_PAGES"}, \ {I_NEW, "I_NEW"}, \ {I_WILL_FREE, "I_WILL_FREE"}, \ {I_FREEING, "I_FREEING"}, \ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ {I_REFERENCED, "I_REFERENCED"}, \ {I_LINKABLE, "I_LINKABLE"}, \ {I_WB_SWITCH, "I_WB_SWITCH"}, \ {I_OVL_INUSE, "I_OVL_INUSE"}, \ {I_CREATING, "I_CREATING"}, \ {I_DONTCACHE, "I_DONTCACHE"}, \ {I_SYNC_QUEUED, "I_SYNC_QUEUED"}, \ {I_PINNING_NETFS_WB, "I_PINNING_NETFS_WB"}, \ {I_LRU_ISOLATING, "I_LRU_ISOLATING"} \ ) /* enums need to be exported to user space */ #undef EM #undef EMe #define EM(a,b) TRACE_DEFINE_ENUM(a); #define EMe(a,b) TRACE_DEFINE_ENUM(a); #define WB_WORK_REASON \ EM( WB_REASON_BACKGROUND, "background") \ EM( WB_REASON_VMSCAN, "vmscan") \ EM( WB_REASON_SYNC, "sync") \ EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ EM( WB_REASON_FORKER_THREAD, "forker_thread") \ EMe(WB_REASON_FOREIGN_FLUSH, "foreign_flush") WB_WORK_REASON /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a,b) { a, b }, #define EMe(a,b) { a, b } struct wb_writeback_work; DECLARE_EVENT_CLASS(writeback_folio_template, TP_PROTO(struct folio *folio, struct address_space *mapping), TP_ARGS(folio, mapping), TP_STRUCT__entry ( __array(char, name, 32) __field(ino_t, ino) __field(pgoff_t, index) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : NULL), 32); __entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0; __entry->index = folio->index; ), TP_printk("bdi %s: ino=%lu index=%lu", __entry->name, (unsigned long)__entry->ino, __entry->index ) ); DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio, TP_PROTO(struct folio *folio, struct address_space *mapping), TP_ARGS(folio, mapping) ); DEFINE_EVENT(writeback_folio_template, folio_wait_writeback, TP_PROTO(struct folio *folio, struct address_space *mapping), TP_ARGS(folio, mapping) ); DECLARE_EVENT_CLASS(writeback_dirty_inode_template, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags), TP_STRUCT__entry ( __array(char, name, 32) __field(ino_t, ino) __field(unsigned long, state) __field(unsigned long, flags) ), TP_fast_assign( struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; ), TP_printk("bdi %s: ino=%lu state=%s flags=%s", __entry->name, (unsigned long)__entry->ino, show_inode_state(__entry->state), show_inode_state(__entry->flags) ) ); DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags) ); DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags) ); DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags) ); #ifdef CREATE_TRACE_POINTS #ifdef CONFIG_CGROUP_WRITEBACK static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb) { return cgroup_ino(wb->memcg_css->cgroup); } static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc) { if (wbc->wb) return __trace_wb_assign_cgroup(wbc->wb); else return 1; } #else /* CONFIG_CGROUP_WRITEBACK */ static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb) { return 1; } static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc) { return 1; } #endif /* CONFIG_CGROUP_WRITEBACK */ #endif /* CREATE_TRACE_POINTS */ #ifdef CONFIG_CGROUP_WRITEBACK TRACE_EVENT(inode_foreign_history, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned int history), TP_ARGS(inode, wbc, history), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(ino_t, cgroup_ino) __field(unsigned int, history) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); __entry->history = history; ), TP_printk("bdi %s: ino=%lu cgroup_ino=%lu history=0x%x", __entry->name, (unsigned long)__entry->ino, (unsigned long)__entry->cgroup_ino, __entry->history ) ); TRACE_EVENT(inode_switch_wbs, TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb, struct bdi_writeback *new_wb), TP_ARGS(inode, old_wb, new_wb), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(ino_t, old_cgroup_ino) __field(ino_t, new_cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32); __entry->ino = inode->i_ino; __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); ), TP_printk("bdi %s: ino=%lu old_cgroup_ino=%lu new_cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, (unsigned long)__entry->old_cgroup_ino, (unsigned long)__entry->new_cgroup_ino ) ); TRACE_EVENT(track_foreign_dirty, TP_PROTO(struct folio *folio, struct bdi_writeback *wb), TP_ARGS(folio, wb), TP_STRUCT__entry( __array(char, name, 32) __field(u64, bdi_id) __field(ino_t, ino) __field(unsigned int, memcg_id) __field(ino_t, cgroup_ino) __field(ino_t, page_cgroup_ino) ), TP_fast_assign( struct address_space *mapping = folio_mapping(folio); struct inode *inode = mapping ? mapping->host : NULL; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->bdi_id = wb->bdi->id; __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", __entry->name, __entry->bdi_id, (unsigned long)__entry->ino, __entry->memcg_id, (unsigned long)__entry->cgroup_ino, (unsigned long)__entry->page_cgroup_ino ) ); TRACE_EVENT(flush_foreign, TP_PROTO(struct bdi_writeback *wb, unsigned int frn_bdi_id, unsigned int frn_memcg_id), TP_ARGS(wb, frn_bdi_id, frn_memcg_id), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, cgroup_ino) __field(unsigned int, frn_bdi_id) __field(unsigned int, frn_memcg_id) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); __entry->frn_bdi_id = frn_bdi_id; __entry->frn_memcg_id = frn_memcg_id; ), TP_printk("bdi %s: cgroup_ino=%lu frn_bdi_id=%u frn_memcg_id=%u", __entry->name, (unsigned long)__entry->cgroup_ino, __entry->frn_bdi_id, __entry->frn_memcg_id ) ); #endif DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc), TP_STRUCT__entry ( __array(char, name, 32) __field(ino_t, ino) __field(int, sync_mode) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, __entry->sync_mode, (unsigned long)__entry->cgroup_ino ) ); DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc) ); DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc) ); DECLARE_EVENT_CLASS(writeback_work_class, TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), TP_ARGS(wb, work), TP_STRUCT__entry( __array(char, name, 32) __field(long, nr_pages) __field(dev_t, sb_dev) __field(int, sync_mode) __field(int, for_kupdate) __field(int, range_cyclic) __field(int, for_background) __field(int, reason) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; __entry->for_kupdate = work->for_kupdate; __entry->range_cyclic = work->range_cyclic; __entry->for_background = work->for_background; __entry->reason = work->reason; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%lu", __entry->name, MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), __entry->nr_pages, __entry->sync_mode, __entry->for_kupdate, __entry->range_cyclic, __entry->for_background, __print_symbolic(__entry->reason, WB_WORK_REASON), (unsigned long)__entry->cgroup_ino ) ); #define DEFINE_WRITEBACK_WORK_EVENT(name) \ DEFINE_EVENT(writeback_work_class, name, \ TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \ TP_ARGS(wb, work)) DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); DEFINE_WRITEBACK_WORK_EVENT(writeback_start); DEFINE_WRITEBACK_WORK_EVENT(writeback_written); DEFINE_WRITEBACK_WORK_EVENT(writeback_wait); TRACE_EVENT(writeback_pages_written, TP_PROTO(long pages_written), TP_ARGS(pages_written), TP_STRUCT__entry( __field(long, pages) ), TP_fast_assign( __entry->pages = pages_written; ), TP_printk("%ld", __entry->pages) ); DECLARE_EVENT_CLASS(writeback_class, TP_PROTO(struct bdi_writeback *wb), TP_ARGS(wb), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: cgroup_ino=%lu", __entry->name, (unsigned long)__entry->cgroup_ino ) ); #define DEFINE_WRITEBACK_EVENT(name) \ DEFINE_EVENT(writeback_class, name, \ TP_PROTO(struct bdi_writeback *wb), \ TP_ARGS(wb)) DEFINE_WRITEBACK_EVENT(writeback_wake_background); TRACE_EVENT(writeback_bdi_register, TP_PROTO(struct backing_dev_info *bdi), TP_ARGS(bdi), TP_STRUCT__entry( __array(char, name, 32) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name ) ); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), TP_ARGS(wbc, bdi), TP_STRUCT__entry( __array(char, name, 32) __field(long, nr_to_write) __field(long, pages_skipped) __field(int, sync_mode) __field(int, for_kupdate) __field(int, for_background) __field(int, for_reclaim) __field(int, range_cyclic) __field(long, range_start) __field(long, range_end) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; __entry->for_kupdate = wbc->for_kupdate; __entry->for_background = wbc->for_background; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; __entry->range_start = (long)wbc->range_start; __entry->range_end = (long)wbc->range_end; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " "bgrd=%d reclm=%d cyclic=%d " "start=0x%lx end=0x%lx cgroup_ino=%lu", __entry->name, __entry->nr_to_write, __entry->pages_skipped, __entry->sync_mode, __entry->for_kupdate, __entry->for_background, __entry->for_reclaim, __entry->range_cyclic, __entry->range_start, __entry->range_end, (unsigned long)__entry->cgroup_ino ) ) #define DEFINE_WBC_EVENT(name) \ DEFINE_EVENT(wbc_class, name, \ TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \ TP_ARGS(wbc, bdi)) DEFINE_WBC_EVENT(wbc_writepage); TRACE_EVENT(writeback_queue_io, TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work, unsigned long dirtied_before, int moved), TP_ARGS(wb, work, dirtied_before, moved), TP_STRUCT__entry( __array(char, name, 32) __field(unsigned long, older) __field(long, age) __field(int, moved) __field(int, reason) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->older = dirtied_before; __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; __entry->reason = work->reason; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu", __entry->name, __entry->older, /* dirtied_before in jiffies */ __entry->age, /* dirtied_before in relative milliseconds */ __entry->moved, __print_symbolic(__entry->reason, WB_WORK_REASON), (unsigned long)__entry->cgroup_ino ) ); TRACE_EVENT(global_dirty_state, TP_PROTO(unsigned long background_thresh, unsigned long dirty_thresh ), TP_ARGS(background_thresh, dirty_thresh ), TP_STRUCT__entry( __field(unsigned long, nr_dirty) __field(unsigned long, nr_writeback) __field(unsigned long, background_thresh) __field(unsigned long, dirty_thresh) __field(unsigned long, dirty_limit) __field(unsigned long, nr_dirtied) __field(unsigned long, nr_written) ), TP_fast_assign( __entry->nr_dirty = global_node_page_state(NR_FILE_DIRTY); __entry->nr_writeback = global_node_page_state(NR_WRITEBACK); __entry->nr_dirtied = global_node_page_state(NR_DIRTIED); __entry->nr_written = global_node_page_state(NR_WRITTEN); __entry->background_thresh = background_thresh; __entry->dirty_thresh = dirty_thresh; __entry->dirty_limit = global_wb_domain.dirty_limit; ), TP_printk("dirty=%lu writeback=%lu " "bg_thresh=%lu thresh=%lu limit=%lu " "dirtied=%lu written=%lu", __entry->nr_dirty, __entry->nr_writeback, __entry->background_thresh, __entry->dirty_thresh, __entry->dirty_limit, __entry->nr_dirtied, __entry->nr_written ) ); #define KBps(x) ((x) << (PAGE_SHIFT - 10)) TRACE_EVENT(bdi_dirty_ratelimit, TP_PROTO(struct bdi_writeback *wb, unsigned long dirty_rate, unsigned long task_ratelimit), TP_ARGS(wb, dirty_rate, task_ratelimit), TP_STRUCT__entry( __array(char, bdi, 32) __field(unsigned long, write_bw) __field(unsigned long, avg_write_bw) __field(unsigned long, dirty_rate) __field(unsigned long, dirty_ratelimit) __field(unsigned long, task_ratelimit) __field(unsigned long, balanced_dirty_ratelimit) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); __entry->dirty_ratelimit = KBps(wb->dirty_ratelimit); __entry->task_ratelimit = KBps(task_ratelimit); __entry->balanced_dirty_ratelimit = KBps(wb->balanced_dirty_ratelimit); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: " "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "balanced_dirty_ratelimit=%lu cgroup_ino=%lu", __entry->bdi, __entry->write_bw, /* write bandwidth */ __entry->avg_write_bw, /* avg write bandwidth */ __entry->dirty_rate, /* bdi dirty rate */ __entry->dirty_ratelimit, /* base ratelimit */ __entry->task_ratelimit, /* ratelimit with position control */ __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */ (unsigned long)__entry->cgroup_ino ) ); TRACE_EVENT(balance_dirty_pages, TP_PROTO(struct bdi_writeback *wb, unsigned long thresh, unsigned long bg_thresh, unsigned long dirty, unsigned long bdi_thresh, unsigned long bdi_dirty, unsigned long dirty_ratelimit, unsigned long task_ratelimit, unsigned long dirtied, unsigned long period, long pause, unsigned long start_time), TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, dirty_ratelimit, task_ratelimit, dirtied, period, pause, start_time), TP_STRUCT__entry( __array( char, bdi, 32) __field(unsigned long, limit) __field(unsigned long, setpoint) __field(unsigned long, dirty) __field(unsigned long, bdi_setpoint) __field(unsigned long, bdi_dirty) __field(unsigned long, dirty_ratelimit) __field(unsigned long, task_ratelimit) __field(unsigned int, dirtied) __field(unsigned int, dirtied_pause) __field(unsigned long, paused) __field( long, pause) __field(unsigned long, period) __field( long, think) __field(ino_t, cgroup_ino) ), TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + freerun) / 2; __entry->dirty = dirty; __entry->bdi_setpoint = __entry->setpoint * bdi_thresh / (thresh + 1); __entry->bdi_dirty = bdi_dirty; __entry->dirty_ratelimit = KBps(dirty_ratelimit); __entry->task_ratelimit = KBps(task_ratelimit); __entry->dirtied = dirtied; __entry->dirtied_pause = current->nr_dirtied_pause; __entry->think = current->dirty_paused_when == 0 ? 0 : (long)(jiffies - current->dirty_paused_when) * 1000/HZ; __entry->period = period * 1000 / HZ; __entry->pause = pause * 1000 / HZ; __entry->paused = (jiffies - start_time) * 1000 / HZ; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: " "limit=%lu setpoint=%lu dirty=%lu " "bdi_setpoint=%lu bdi_dirty=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "dirtied=%u dirtied_pause=%u " "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%lu", __entry->bdi, __entry->limit, __entry->setpoint, __entry->dirty, __entry->bdi_setpoint, __entry->bdi_dirty, __entry->dirty_ratelimit, __entry->task_ratelimit, __entry->dirtied, __entry->dirtied_pause, __entry->paused, /* ms */ __entry->pause, /* ms */ __entry->period, /* ms */ __entry->think, /* ms */ (unsigned long)__entry->cgroup_ino ) ); TRACE_EVENT(writeback_sb_inodes_requeue, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(unsigned long, state) __field(unsigned long, dirtied_when) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode)); ), TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, show_inode_state(__entry->state), __entry->dirtied_when, (jiffies - __entry->dirtied_when) / HZ, (unsigned long)__entry->cgroup_ino ) ); DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned long nr_to_write ), TP_ARGS(inode, wbc, nr_to_write), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(unsigned long, state) __field(unsigned long, dirtied_when) __field(unsigned long, writeback_index) __field(long, nr_to_write) __field(unsigned long, wrote) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->nr_to_write = nr_to_write; __entry->wrote = nr_to_write - wbc->nr_to_write; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " "index=%lu to_write=%ld wrote=%lu cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, show_inode_state(__entry->state), __entry->dirtied_when, (jiffies - __entry->dirtied_when) / HZ, __entry->writeback_index, __entry->nr_to_write, __entry->wrote, (unsigned long)__entry->cgroup_ino ) ); DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned long nr_to_write), TP_ARGS(inode, wbc, nr_to_write) ); DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned long nr_to_write), TP_ARGS(inode, wbc, nr_to_write) ); DECLARE_EVENT_CLASS(writeback_inode_template, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field(unsigned long, state ) __field( __u16, mode ) __field(unsigned long, dirtied_when ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->mode = inode->i_mode; __entry->dirtied_when = inode->dirtied_when; ), TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long)__entry->ino, __entry->dirtied_when, show_inode_state(__entry->state), __entry->mode) ); DEFINE_EVENT(writeback_inode_template, writeback_lazytime, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(writeback_inode_template, writeback_lazytime_iput, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(writeback_inode_template, writeback_dirty_inode_enqueue, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); /* * Inode writeback list tracking. */ DEFINE_EVENT(writeback_inode_template, sb_mark_inode_writeback, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(writeback_inode_template, sb_clear_inode_writeback, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
2 2 2 2 4 2 2 2 2 2 2 2 2 2 2 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/etherdevice.h> #include <linux/if_tap.h> #include <linux/if_vlan.h> #include <linux/interrupt.h> #include <linux/nsproxy.h> #include <linux/compat.h> #include <linux/if_tun.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/cache.h> #include <linux/sched/signal.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/fs.h> #include <linux/uio.h> #include <net/gso.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <net/sock.h> #include <net/xdp.h> #include <linux/virtio_net.h> #include <linux/skb_array.h> #define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) #define TAP_VNET_LE 0x80000000 #define TAP_VNET_BE 0x40000000 #ifdef CONFIG_TUN_VNET_CROSS_LE static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } static long tap_get_vnet_be(struct tap_queue *q, int __user *sp) { int s = !!(q->flags & TAP_VNET_BE); if (put_user(s, sp)) return -EFAULT; return 0; } static long tap_set_vnet_be(struct tap_queue *q, int __user *sp) { int s; if (get_user(s, sp)) return -EFAULT; if (s) q->flags |= TAP_VNET_BE; else q->flags &= ~TAP_VNET_BE; return 0; } #else static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { return virtio_legacy_is_little_endian(); } static long tap_get_vnet_be(struct tap_queue *q, int __user *argp) { return -EINVAL; } static long tap_set_vnet_be(struct tap_queue *q, int __user *argp) { return -EINVAL; } #endif /* CONFIG_TUN_VNET_CROSS_LE */ static inline bool tap_is_little_endian(struct tap_queue *q) { return q->flags & TAP_VNET_LE || tap_legacy_is_little_endian(q); } static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) { return __virtio16_to_cpu(tap_is_little_endian(q), val); } static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) { return __cpu_to_virtio16(tap_is_little_endian(q), val); } static struct proto tap_proto = { .name = "tap", .owner = THIS_MODULE, .obj_size = sizeof(struct tap_queue), }; #define TAP_NUM_DEVS (1U << MINORBITS) static LIST_HEAD(major_list); struct major_info { struct rcu_head rcu; dev_t major; struct idr minor_idr; spinlock_t minor_lock; const char *device_name; struct list_head next; }; #define GOODCOPY_LEN 128 static const struct proto_ops tap_socket_ops; #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) static struct tap_dev *tap_dev_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } /* * RCU usage: * The tap_queue and the macvlan_dev are loosely coupled, the * pointers from one to the other can only be read while rcu_read_lock * or rtnl is held. * * Both the file and the macvlan_dev hold a reference on the tap_queue * through sock_hold(&q->sk). When the macvlan_dev goes away first, * q->vlan becomes inaccessible. When the files gets closed, * tap_get_queue() fails. * * There may still be references to the struct sock inside of the * queue from outbound SKBs, but these never reference back to the * file or the dev. The data structure is freed through __sk_free * when both our references and any pending SKBs are gone. */ static int tap_enable_queue(struct tap_dev *tap, struct file *file, struct tap_queue *q) { int err = -EINVAL; ASSERT_RTNL(); if (q->enabled) goto out; err = 0; rcu_assign_pointer(tap->taps[tap->numvtaps], q); q->queue_index = tap->numvtaps; q->enabled = true; tap->numvtaps++; out: return err; } /* Requires RTNL */ static int tap_set_queue(struct tap_dev *tap, struct file *file, struct tap_queue *q) { if (tap->numqueues == MAX_TAP_QUEUES) return -EBUSY; rcu_assign_pointer(q->tap, tap); rcu_assign_pointer(tap->taps[tap->numvtaps], q); sock_hold(&q->sk); q->file = file; q->queue_index = tap->numvtaps; q->enabled = true; file->private_data = q; list_add_tail(&q->next, &tap->queue_list); tap->numvtaps++; tap->numqueues++; return 0; } static int tap_disable_queue(struct tap_queue *q) { struct tap_dev *tap; struct tap_queue *nq; ASSERT_RTNL(); if (!q->enabled) return -EINVAL; tap = rtnl_dereference(q->tap); if (tap) { int index = q->queue_index; BUG_ON(index >= tap->numvtaps); nq = rtnl_dereference(tap->taps[tap->numvtaps - 1]); nq->queue_index = index; rcu_assign_pointer(tap->taps[index], nq); RCU_INIT_POINTER(tap->taps[tap->numvtaps - 1], NULL); q->enabled = false; tap->numvtaps--; } return 0; } /* * The file owning the queue got closed, give up both * the reference that the files holds as well as the * one from the macvlan_dev if that still exists. * * Using the spinlock makes sure that we don't get * to the queue again after destroying it. */ static void tap_put_queue(struct tap_queue *q) { struct tap_dev *tap; rtnl_lock(); tap = rtnl_dereference(q->tap); if (tap) { if (q->enabled) BUG_ON(tap_disable_queue(q)); tap->numqueues--; RCU_INIT_POINTER(q->tap, NULL); sock_put(&q->sk); list_del_init(&q->next); } rtnl_unlock(); synchronize_rcu(); sock_put(&q->sk); } /* * Select a queue based on the rxq of the device on which this packet * arrived. If the incoming device is not mq, calculate a flow hash * to select a queue. If all fails, find the first available queue. * Cache vlan->numvtaps since it can become zero during the execution * of this function. */ static struct tap_queue *tap_get_queue(struct tap_dev *tap, struct sk_buff *skb) { struct tap_queue *queue = NULL; /* Access to taps array is protected by rcu, but access to numvtaps * isn't. Below we use it to lookup a queue, but treat it as a hint * and validate that the result isn't NULL - in case we are * racing against queue removal. */ int numvtaps = READ_ONCE(tap->numvtaps); __u32 rxq; if (!numvtaps) goto out; if (numvtaps == 1) goto single; /* Check if we can use flow to select a queue */ rxq = skb_get_hash(skb); if (rxq) { queue = rcu_dereference(tap->taps[rxq % numvtaps]); goto out; } if (likely(skb_rx_queue_recorded(skb))) { rxq = skb_get_rx_queue(skb); while (unlikely(rxq >= numvtaps)) rxq -= numvtaps; queue = rcu_dereference(tap->taps[rxq]); goto out; } single: queue = rcu_dereference(tap->taps[0]); out: return queue; } /* * The net_device is going away, give up the reference * that it holds on all queues and safely set the pointer * from the queues to NULL. */ void tap_del_queues(struct tap_dev *tap) { struct tap_queue *q, *tmp; ASSERT_RTNL(); list_for_each_entry_safe(q, tmp, &tap->queue_list, next) { list_del_init(&q->next); RCU_INIT_POINTER(q->tap, NULL); if (q->enabled) tap->numvtaps--; tap->numqueues--; sock_put(&q->sk); } BUG_ON(tap->numvtaps); BUG_ON(tap->numqueues); /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct tap_dev *tap; struct tap_queue *q; netdev_features_t features = TAP_FEATURES; enum skb_drop_reason drop_reason; tap = tap_dev_get_rcu(dev); if (!tap) return RX_HANDLER_PASS; q = tap_get_queue(tap, skb); if (!q) return RX_HANDLER_PASS; skb_push(skb, ETH_HLEN); /* Apply the forward feature mask so that we perform segmentation * according to users wishes. This only works if VNET_HDR is * enabled. */ if (q->flags & IFF_VNET_HDR) features |= tap->tap_features; if (netif_needs_gso(skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); struct sk_buff *next; if (IS_ERR(segs)) { drop_reason = SKB_DROP_REASON_SKB_GSO_SEG; goto drop; } if (!segs) { if (ptr_ring_produce(&q->ring, skb)) { drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; } goto wake_up; } consume_skb(skb); skb_list_walk_safe(segs, skb, next) { skb_mark_not_on_list(skb); if (ptr_ring_produce(&q->ring, skb)) { drop_reason = SKB_DROP_REASON_FULL_RING; kfree_skb_reason(skb, drop_reason); kfree_skb_list_reason(next, drop_reason); break; } } } else { /* If we receive a partial checksum and the tap side * doesn't support checksum offload, compute the checksum. * Note: it doesn't matter which checksum feature to * check, we either support them all or none. */ if (skb->ip_summed == CHECKSUM_PARTIAL && !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) { drop_reason = SKB_DROP_REASON_SKB_CSUM; goto drop; } if (ptr_ring_produce(&q->ring, skb)) { drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; } } wake_up: wake_up_interruptible_poll(sk_sleep(&q->sk), EPOLLIN | EPOLLRDNORM | EPOLLRDBAND); return RX_HANDLER_CONSUMED; drop: /* Count errors/drops only here, thus don't care about args. */ if (tap->count_rx_dropped) tap->count_rx_dropped(tap); kfree_skb_reason(skb, drop_reason); return RX_HANDLER_CONSUMED; } EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { struct major_info *tap_major; list_for_each_entry_rcu(tap_major, &major_list, next) { if (tap_major->major == major) return tap_major; } return NULL; } int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; struct major_info *tap_major; rcu_read_lock(); tap_major = tap_get_major(MAJOR(major)); if (!tap_major) { retval = -EINVAL; goto unlock; } spin_lock(&tap_major->minor_lock); retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_ATOMIC); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } spin_unlock(&tap_major->minor_lock); unlock: rcu_read_unlock(); return retval < 0 ? retval : 0; } EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { struct major_info *tap_major; rcu_read_lock(); tap_major = tap_get_major(MAJOR(major)); if (!tap_major) { goto unlock; } spin_lock(&tap_major->minor_lock); if (tap->minor) { idr_remove(&tap_major->minor_idr, tap->minor); tap->minor = 0; } spin_unlock(&tap_major->minor_lock); unlock: rcu_read_unlock(); } EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; struct major_info *tap_major; rcu_read_lock(); tap_major = tap_get_major(major); if (!tap_major) { tap = NULL; goto unlock; } spin_lock(&tap_major->minor_lock); tap = idr_find(&tap_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } spin_unlock(&tap_major->minor_lock); unlock: rcu_read_unlock(); return tap; } static void tap_sock_write_space(struct sock *sk) { wait_queue_head_t *wqueue; if (!sock_writeable(sk) || !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); if (wqueue && waitqueue_active(wqueue)) wake_up_interruptible_poll(wqueue, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); } static void tap_sock_destruct(struct sock *sk) { struct tap_queue *q = container_of(sk, struct tap_queue, sk); ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb); } static int tap_open(struct inode *inode, struct file *file) { struct net *net = current->nsproxy->net_ns; struct tap_dev *tap; struct tap_queue *q; int err = -ENODEV; rtnl_lock(); tap = dev_get_by_tap_file(imajor(inode), iminor(inode)); if (!tap) goto err; err = -ENOMEM; q = (struct tap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tap_proto, 0); if (!q) goto err; if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) { sk_free(&q->sk); goto err; } init_waitqueue_head(&q->sock.wq.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; sock_init_data_uid(&q->sock, &q->sk, current_fsuid()); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); /* * so far only KVM virtio_net uses tap, enable zero copy between * guest kernel and host kernel when lower device supports zerocopy * * The macvlan supports zerocopy iff the lower device supports zero * copy so we don't have to look at the lower device directly. */ if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG)) sock_set_flag(&q->sk, SOCK_ZEROCOPY); err = tap_set_queue(tap, file, q); if (err) { /* tap_sock_destruct() will take care of freeing ptr_ring */ goto err_put; } /* tap groks IOCB_NOWAIT just fine, mark it as such */ file->f_mode |= FMODE_NOWAIT; dev_put(tap->dev); rtnl_unlock(); return err; err_put: sock_put(&q->sk); err: if (tap) dev_put(tap->dev); rtnl_unlock(); return err; } static int tap_release(struct inode *inode, struct file *file) { struct tap_queue *q = file->private_data; tap_put_queue(q); return 0; } static __poll_t tap_poll(struct file *file, poll_table *wait) { struct tap_queue *q = file->private_data; __poll_t mask = EPOLLERR; if (!q) goto out; mask = 0; poll_wait(file, &q->sock.wq.wait, wait); if (!ptr_ring_empty(&q->ring)) mask |= EPOLLIN | EPOLLRDNORM; if (sock_writeable(&q->sk) || (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) && sock_writeable(&q->sk))) mask |= EPOLLOUT | EPOLLWRNORM; out: return mask; } static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, size_t len, size_t linear, int noblock, int *err) { struct sk_buff *skb; /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE || !linear) linear = len; if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, err, PAGE_ALLOC_COSTLY_ORDER); if (!skb) return NULL; skb_reserve(skb, prepad); skb_put(skb, linear); skb->data_len = len - linear; skb->len += len - linear; return skb; } /* Neighbour code has some assumptions on HH_DATA_MOD alignment */ #define TAP_RESERVE HH_DATA_OFF(ETH_HLEN) /* Get packet from user space buffer */ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, struct iov_iter *from, int noblock) { int good_linear = SKB_MAX_HEAD(TAP_RESERVE); struct sk_buff *skb; struct tap_dev *tap; unsigned long total_len = iov_iter_count(from); unsigned long len = total_len; int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false; size_t linear; enum skb_drop_reason drop_reason; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); err = -EINVAL; if (len < vnet_hdr_len) goto err; len -= vnet_hdr_len; err = -EFAULT; if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) goto err; iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && tap16_to_cpu(q, vnet_hdr.csum_start) + tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > tap16_to_cpu(q, vnet_hdr.hdr_len)) vnet_hdr.hdr_len = cpu_to_tap16(q, tap16_to_cpu(q, vnet_hdr.csum_start) + tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); err = -EINVAL; if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) goto err; } err = -EINVAL; if (unlikely(len < ETH_HLEN)) goto err; if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i; copylen = vnet_hdr.hdr_len ? tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; if (copylen > good_linear) copylen = good_linear; else if (copylen < ETH_HLEN) copylen = ETH_HLEN; linear = copylen; i = *from; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) zerocopy = true; } if (!zerocopy) { copylen = len; linear = tap16_to_cpu(q, vnet_hdr.hdr_len); if (linear > good_linear) linear = good_linear; else if (linear < ETH_HLEN) linear = ETH_HLEN; } skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen, linear, noblock, &err); if (!skb) goto err; if (zerocopy) err = zerocopy_sg_from_iter(skb, from); else err = skb_copy_datagram_from_iter(skb, 0, from, len); if (err) { drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto err_kfree; } skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; rcu_read_lock(); tap = rcu_dereference(q->tap); if (!tap) { kfree_skb(skb); rcu_read_unlock(); return total_len; } skb->dev = tap->dev; if (vnet_hdr_len) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, tap_is_little_endian(q)); if (err) { rcu_read_unlock(); drop_reason = SKB_DROP_REASON_DEV_HDR; goto err_kfree; } } skb_probe_transport_header(skb); /* Move network header to the right position for VLAN tagged packets */ if (eth_type_vlan(skb->protocol) && vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { skb_zcopy_init(skb, msg_control); } else if (msg_control) { struct ubuf_info *uarg = msg_control; uarg->ops->complete(NULL, uarg, false); } dev_queue_xmit(skb); rcu_read_unlock(); return total_len; err_kfree: kfree_skb_reason(skb, drop_reason); err: rcu_read_lock(); tap = rcu_dereference(q->tap); if (tap && tap->count_tx_dropped) tap->count_tx_dropped(tap); rcu_read_unlock(); return err; } static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; int noblock = 0; if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) noblock = 1; return tap_get_user(q, NULL, from, noblock); } /* Put packet to the user space buffer */ static ssize_t tap_put_user(struct tap_queue *q, const struct sk_buff *skb, struct iov_iter *iter) { int ret; int vnet_hdr_len = 0; int vlan_offset = 0; int total; if (q->flags & IFF_VNET_HDR) { int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; struct virtio_net_hdr vnet_hdr; vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); if (iov_iter_count(iter) < vnet_hdr_len) return -EINVAL; if (virtio_net_hdr_from_skb(skb, &vnet_hdr, tap_is_little_endian(q), true, vlan_hlen)) BUG(); if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != sizeof(vnet_hdr)) return -EFAULT; iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); } total = vnet_hdr_len; total += skb->len; if (skb_vlan_tag_present(skb)) { struct { __be16 h_vlan_proto; __be16 h_vlan_TCI; } veth; veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); total += VLAN_HLEN; ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); if (ret || !iov_iter_count(iter)) goto done; ret = copy_to_iter(&veth, sizeof(veth), iter); if (ret != sizeof(veth) || !iov_iter_count(iter)) goto done; } ret = skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset); done: return ret ? ret : total; } static ssize_t tap_do_read(struct tap_queue *q, struct iov_iter *to, int noblock, struct sk_buff *skb) { DEFINE_WAIT(wait); ssize_t ret = 0; if (!iov_iter_count(to)) { kfree_skb(skb); return 0; } if (skb) goto put; while (1) { if (!noblock) prepare_to_wait(sk_sleep(&q->sk), &wait, TASK_INTERRUPTIBLE); /* Read frames from the queue */ skb = ptr_ring_consume(&q->ring); if (skb) break; if (noblock) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } /* Nothing to read, let's sleep */ schedule(); } if (!noblock) finish_wait(sk_sleep(&q->sk), &wait); put: if (skb) { ret = tap_put_user(q, skb, to); if (unlikely(ret < 0)) kfree_skb(skb); else consume_skb(skb); } return ret; } static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; ssize_t len = iov_iter_count(to), ret; int noblock = 0; if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) noblock = 1; ret = tap_do_read(q, to, noblock, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; return ret; } static struct tap_dev *tap_get_tap_dev(struct tap_queue *q) { struct tap_dev *tap; ASSERT_RTNL(); tap = rtnl_dereference(q->tap); if (tap) dev_hold(tap->dev); return tap; } static void tap_put_tap_dev(struct tap_dev *tap) { dev_put(tap->dev); } static int tap_ioctl_set_queue(struct file *file, unsigned int flags) { struct tap_queue *q = file->private_data; struct tap_dev *tap; int ret; tap = tap_get_tap_dev(q); if (!tap) return -EINVAL; if (flags & IFF_ATTACH_QUEUE) ret = tap_enable_queue(tap, file, q); else if (flags & IFF_DETACH_QUEUE) ret = tap_disable_queue(q); else ret = -EINVAL; tap_put_tap_dev(tap); return ret; } static int set_offload(struct tap_queue *q, unsigned long arg) { struct tap_dev *tap; netdev_features_t features; netdev_features_t feature_mask = 0; tap = rtnl_dereference(q->tap); if (!tap) return -ENOLINK; features = tap->dev->features; if (arg & TUN_F_CSUM) { feature_mask = NETIF_F_HW_CSUM; if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) { if (arg & TUN_F_TSO_ECN) feature_mask |= NETIF_F_TSO_ECN; if (arg & TUN_F_TSO4) feature_mask |= NETIF_F_TSO; if (arg & TUN_F_TSO6) feature_mask |= NETIF_F_TSO6; } /* TODO: for now USO4 and USO6 should work simultaneously */ if ((arg & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) features |= NETIF_F_GSO_UDP_L4; } /* tun/tap driver inverts the usage for TSO offloads, where * setting the TSO bit means that the userspace wants to * accept TSO frames and turning it off means that user space * does not support TSO. * For tap, we have to invert it to mean the same thing. * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6) || (feature_mask & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) features |= RX_OFFLOADS; else features &= ~RX_OFFLOADS; /* tap_features are the same as features on tun/tap and * reflect user expectations. */ tap->tap_features = feature_mask; if (tap->update_features) tap->update_features(tap, features); return 0; } /* * provide compatibility with generic tun/tap interface */ static long tap_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct tap_queue *q = file->private_data; struct tap_dev *tap; void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; unsigned int __user *up = argp; unsigned short u; int __user *sp = argp; struct sockaddr sa; int s; int ret; switch (cmd) { case TUNSETIFF: /* ignore the name, just look at flags */ if (get_user(u, &ifr->ifr_flags)) return -EFAULT; ret = 0; if ((u & ~TAP_IFFEATURES) != (IFF_NO_PI | IFF_TAP)) ret = -EINVAL; else q->flags = (q->flags & ~TAP_IFFEATURES) | u; return ret; case TUNGETIFF: rtnl_lock(); tap = tap_get_tap_dev(q); if (!tap) { rtnl_unlock(); return -ENOLINK; } ret = 0; u = q->flags; if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || put_user(u, &ifr->ifr_flags)) ret = -EFAULT; tap_put_tap_dev(tap); rtnl_unlock(); return ret; case TUNSETQUEUE: if (get_user(u, &ifr->ifr_flags)) return -EFAULT; rtnl_lock(); ret = tap_ioctl_set_queue(file, u); rtnl_unlock(); return ret; case TUNGETFEATURES: if (put_user(IFF_TAP | IFF_NO_PI | TAP_IFFEATURES, up)) return -EFAULT; return 0; case TUNSETSNDBUF: if (get_user(s, sp)) return -EFAULT; if (s <= 0) return -EINVAL; q->sk.sk_sndbuf = s; return 0; case TUNGETVNETHDRSZ: s = q->vnet_hdr_sz; if (put_user(s, sp)) return -EFAULT; return 0; case TUNSETVNETHDRSZ: if (get_user(s, sp)) return -EFAULT; if (s < (int)sizeof(struct virtio_net_hdr)) return -EINVAL; q->vnet_hdr_sz = s; return 0; case TUNGETVNETLE: s = !!(q->flags & TAP_VNET_LE); if (put_user(s, sp)) return -EFAULT; return 0; case TUNSETVNETLE: if (get_user(s, sp)) return -EFAULT; if (s) q->flags |= TAP_VNET_LE; else q->flags &= ~TAP_VNET_LE; return 0; case TUNGETVNETBE: return tap_get_vnet_be(q, sp); case TUNSETVNETBE: return tap_set_vnet_be(q, sp); case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_USO4 | TUN_F_USO6)) return -EINVAL; rtnl_lock(); ret = set_offload(q, arg); rtnl_unlock(); return ret; case SIOCGIFHWADDR: rtnl_lock(); tap = tap_get_tap_dev(q); if (!tap) { rtnl_unlock(); return -ENOLINK; } ret = 0; dev_get_mac_address(&sa, dev_net(tap->dev), tap->dev->name); if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || copy_to_user(&ifr->ifr_hwaddr, &sa, sizeof(sa))) ret = -EFAULT; tap_put_tap_dev(tap); rtnl_unlock(); return ret; case SIOCSIFHWADDR: if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) return -EFAULT; rtnl_lock(); tap = tap_get_tap_dev(q); if (!tap) { rtnl_unlock(); return -ENOLINK; } ret = dev_set_mac_address_user(tap->dev, &sa, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; default: return -EINVAL; } } static const struct file_operations tap_fops = { .owner = THIS_MODULE, .open = tap_open, .release = tap_release, .read_iter = tap_read_iter, .write_iter = tap_write_iter, .poll = tap_poll, .llseek = no_llseek, .unlocked_ioctl = tap_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) { struct tun_xdp_hdr *hdr = xdp->data_hard_start; struct virtio_net_hdr *gso = &hdr->gso; int buflen = hdr->buflen; int vnet_hdr_len = 0; struct tap_dev *tap; struct sk_buff *skb; int err, depth; if (unlikely(xdp->data_end - xdp->data < ETH_HLEN)) { err = -EINVAL; goto err; } if (q->flags & IFF_VNET_HDR) vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { err = -ENOMEM; goto err; } skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; if (vnet_hdr_len) { err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q)); if (err) goto err_kfree; } /* Move network header to the right position for VLAN tagged packets */ if (eth_type_vlan(skb->protocol) && vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); rcu_read_lock(); tap = rcu_dereference(q->tap); if (tap) { skb->dev = tap->dev; skb_probe_transport_header(skb); dev_queue_xmit(skb); } else { kfree_skb(skb); } rcu_read_unlock(); return 0; err_kfree: kfree_skb(skb); err: rcu_read_lock(); tap = rcu_dereference(q->tap); if (tap && tap->count_tx_dropped) tap->count_tx_dropped(tap); rcu_read_unlock(); return err; } static int tap_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); struct tun_msg_ctl *ctl = m->msg_control; struct xdp_buff *xdp; int i; if (m->msg_controllen == sizeof(struct tun_msg_ctl) && ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); } return 0; } return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); } static int tap_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); struct sk_buff *skb = m->msg_control; int ret; if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) { kfree_skb(skb); return -EINVAL; } ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; } return ret; } static int tap_peek_len(struct socket *sock) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag); } /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tap_socket_ops = { .sendmsg = tap_sendmsg, .recvmsg = tap_recvmsg, .peek_len = tap_peek_len, }; /* Get an underlying socket object from tun file. Returns error unless file is * attached to a device. The returned object works like a packet socket, it * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for * holding a reference to the file for as long as the socket is in use. */ struct socket *tap_get_socket(struct file *file) { struct tap_queue *q; if (file->f_op != &tap_fops) return ERR_PTR(-EINVAL); q = file->private_data; if (!q) return ERR_PTR(-EBADFD); return &q->sock; } EXPORT_SYMBOL_GPL(tap_get_socket); struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q; if (file->f_op != &tap_fops) return ERR_PTR(-EINVAL); q = file->private_data; if (!q) return ERR_PTR(-EBADFD); return &q->ring; } EXPORT_SYMBOL_GPL(tap_get_ptr_ring); int tap_queue_resize(struct tap_dev *tap) { struct net_device *dev = tap->dev; struct tap_queue *q; struct ptr_ring **rings; int n = tap->numqueues; int ret, i = 0; rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); if (!rings) return -ENOMEM; list_for_each_entry(q, &tap->queue_list, next) rings[i++] = &q->ring; ret = ptr_ring_resize_multiple(rings, n, dev->tx_queue_len, GFP_KERNEL, __skb_array_destroy_skb); kfree(rings); return ret; } EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { struct major_info *tap_major; tap_major = kzalloc(sizeof(*tap_major), GFP_ATOMIC); if (!tap_major) return -ENOMEM; tap_major->major = MAJOR(major); idr_init(&tap_major->minor_idr); spin_lock_init(&tap_major->minor_lock); tap_major->device_name = device_name; list_add_tail_rcu(&tap_major->next, &major_list); return 0; } int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, const char *device_name, struct module *module) { int err; err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); if (err) goto out1; cdev_init(tap_cdev, &tap_fops); tap_cdev->owner = module; err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS); if (err) goto out2; err = tap_list_add(*tap_major, device_name); if (err) goto out3; return 0; out3: cdev_del(tap_cdev); out2: unregister_chrdev_region(*tap_major, TAP_NUM_DEVS); out1: return err; } EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { struct major_info *tap_major, *tmp; cdev_del(tap_cdev); unregister_chrdev_region(major, TAP_NUM_DEVS); list_for_each_entry_safe(tap_major, tmp, &major_list, next) { if (tap_major->major == MAJOR(major)) { idr_destroy(&tap_major->minor_idr); list_del_rcu(&tap_major->next); kfree_rcu(tap_major, rcu); } } } EXPORT_SYMBOL_GPL(tap_destroy_cdev); MODULE_DESCRIPTION("Common library for drivers implementing the TAP interface"); MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>"); MODULE_LICENSE("GPL"); |
89 90 148 1 147 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 4 |